Created
November 13, 2025 22:56
-
-
Save seantalts/5845e01d3875aee28a53aa4f8501df7b to your computer and use it in GitHub Desktop.
rsqrt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <stdio.h> | |
| #include <stdint.h> | |
| #include <math.h> | |
| #include <float.h> | |
| typedef float Vec8f __attribute__((vector_size(32))); | |
| typedef int Vec8i __attribute__((vector_size(32))); | |
| typedef unsigned int Vec8u __attribute__((vector_size(32))); | |
| static inline Vec8f rsqrt_fast_internal(Vec8f x) { | |
| _Pragma("float_control(precise, off)") | |
| return 1.0f / __builtin_elementwise_sqrt(x); | |
| } | |
| static inline Vec8u select(Vec8i mask, Vec8u a, Vec8u b) { | |
| return ((Vec8u)mask & a) | (~(Vec8u)mask & b); | |
| } | |
| Vec8f rsqrt_safe_v8(Vec8f x) { | |
| const Vec8f y_fast = rsqrt_fast_internal(x); | |
| const Vec8u u = (Vec8u)x; | |
| const Vec8u abs_u = u & 0x7FFFFFFF; | |
| const Vec8u exponent = (abs_u >> 23) & 0xFF; | |
| const Vec8i mask_zero = (Vec8i)(exponent == 0); | |
| const Vec8i mask_inf = (Vec8i)(abs_u == 0x7F800000); | |
| const Vec8u val_for_zero = (Vec8u){0} + 0x7F800000; // +inf | |
| const Vec8u val_for_inf = (Vec8u){0}; | |
| Vec8u result = select(mask_zero, val_for_zero, y_fast); | |
| result = select(mask_inf, val_for_inf, result); | |
| return (Vec8f)result; | |
| } | |
| // ========================================== | |
| // Test Harness | |
| // ========================================== | |
| void print_lane(int i, const char* name, float val) { | |
| union { float f; uint32_t u; } conv; | |
| conv.f = val; | |
| printf("Lane %d [%-10s]: %15e (Hex: 0x%08X)\n", i, name, val, conv.u); | |
| } | |
| int main() { | |
| printf("Running Vector RSQRT Tests (2-Select Optimized)...\n\n"); | |
| volatile float inputs[8] = { | |
| 4.0f, 0.0f, -0.0f, INFINITY, -INFINITY, NAN, FLT_MIN * 0.5f, -4.0f | |
| }; | |
| Vec8f x; | |
| __builtin_memcpy(&x, (void*)inputs, sizeof(x)); | |
| Vec8f result = rsqrt_safe_v8(x); | |
| float outputs[8]; | |
| __builtin_memcpy(outputs, &result, sizeof(outputs)); | |
| const char* names[] = { | |
| "Normal 4.0", "Zero (+)", "Zero (-)", "Inf (+)", | |
| "Inf (-)", "NaN", "Denormal", "Neg Normal" | |
| }; | |
| for(int i=0; i<8; i++) print_lane(i, names[i], outputs[i]); | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment