Skip to content

Instantly share code, notes, and snippets.

@seantalts
Created November 13, 2025 22:56
Show Gist options
  • Select an option

  • Save seantalts/5845e01d3875aee28a53aa4f8501df7b to your computer and use it in GitHub Desktop.

Select an option

Save seantalts/5845e01d3875aee28a53aa4f8501df7b to your computer and use it in GitHub Desktop.
rsqrt
#include <stdio.h>
#include <stdint.h>
#include <math.h>
#include <float.h>
typedef float Vec8f __attribute__((vector_size(32)));
typedef int Vec8i __attribute__((vector_size(32)));
typedef unsigned int Vec8u __attribute__((vector_size(32)));
static inline Vec8f rsqrt_fast_internal(Vec8f x) {
_Pragma("float_control(precise, off)")
return 1.0f / __builtin_elementwise_sqrt(x);
}
static inline Vec8u select(Vec8i mask, Vec8u a, Vec8u b) {
return ((Vec8u)mask & a) | (~(Vec8u)mask & b);
}
Vec8f rsqrt_safe_v8(Vec8f x) {
const Vec8f y_fast = rsqrt_fast_internal(x);
const Vec8u u = (Vec8u)x;
const Vec8u abs_u = u & 0x7FFFFFFF;
const Vec8u exponent = (abs_u >> 23) & 0xFF;
const Vec8i mask_zero = (Vec8i)(exponent == 0);
const Vec8i mask_inf = (Vec8i)(abs_u == 0x7F800000);
const Vec8u val_for_zero = (Vec8u){0} + 0x7F800000; // +inf
const Vec8u val_for_inf = (Vec8u){0};
Vec8u result = select(mask_zero, val_for_zero, y_fast);
result = select(mask_inf, val_for_inf, result);
return (Vec8f)result;
}
// ==========================================
// Test Harness
// ==========================================
void print_lane(int i, const char* name, float val) {
union { float f; uint32_t u; } conv;
conv.f = val;
printf("Lane %d [%-10s]: %15e (Hex: 0x%08X)\n", i, name, val, conv.u);
}
int main() {
printf("Running Vector RSQRT Tests (2-Select Optimized)...\n\n");
volatile float inputs[8] = {
4.0f, 0.0f, -0.0f, INFINITY, -INFINITY, NAN, FLT_MIN * 0.5f, -4.0f
};
Vec8f x;
__builtin_memcpy(&x, (void*)inputs, sizeof(x));
Vec8f result = rsqrt_safe_v8(x);
float outputs[8];
__builtin_memcpy(outputs, &result, sizeof(outputs));
const char* names[] = {
"Normal 4.0", "Zero (+)", "Zero (-)", "Inf (+)",
"Inf (-)", "NaN", "Denormal", "Neg Normal"
};
for(int i=0; i<8; i++) print_lane(i, names[i], outputs[i]);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment