Last active
August 17, 2024 11:58
-
-
Save WiwilZ/4bfd3ff56203bdabdd3735628506df3e to your computer and use it in GitHub Desktop.
cast function between integer and floating point with intel intrinsics
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #if defined(__SSE2__) || defined(_MSC_VER) && !defined(__clang__) && ((defined(_M_AMD64) || defined(_M_X64)) && !defined(_M_ARM64EC) || defined(_M_IX86) && defined(_M_IX86_FP) && _M_IX86_FP == 2) | |
| # if defined(_MSC_VER) && !defined(__clang__) | |
| #include <intrin.h> | |
| # else | |
| #include <emmintrin.h> | |
| #include <xmmintrin.h> | |
| #include <immintrin.h> | |
| # endif | |
| #include <cstdint> | |
| #include <concepts> | |
| template <std::integral T> | |
| float itof(T x) noexcept { | |
| #ifdef __AVX512F__ | |
| if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) { | |
| return _mm_cvtss_f32(_mm_cvti32_ss(_mm_undefined_ps(), x)); | |
| } else if constexpr (std::is_same_v<T, uint32_t>) { | |
| return _mm_cvtss_f32(_mm_cvtu32_ss(_mm_undefined_ps(), x)); | |
| } else if constexpr (std::is_same_v<T, int64_t>) { | |
| return _mm_cvtss_f32(_mm_cvti64_ss(_mm_undefined_ps(), x)); | |
| } else { | |
| return _mm_cvtss_f32(_mm_cvtu64_ss(_mm_undefined_ps(), x)); | |
| } | |
| #else | |
| if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) { | |
| return _mm_cvtss_f32(_mm_cvtsi32_ss(_mm_undefined_ps(), x)); | |
| } else if constexpr (std::is_same_v<T, uint32_t> || std::is_same_v<T, int64_t>) { | |
| return _mm_cvtss_f32(_mm_cvtsi64_ss(_mm_undefined_ps(), x)); | |
| } else { | |
| if ((x & 0x8000000000000000) == 0) { | |
| return _mm_cvtss_f32(_mm_cvtsi64_ss(_mm_undefined_ps(), x)); | |
| } | |
| const auto tmp = _mm_cvtsi64_ss(_mm_undefined_ps(), (x >> 1) | (x & 1)); | |
| return _mm_cvtss_f32(_mm_add_ss(tmp, tmp)); | |
| } | |
| #endif | |
| } | |
| template <std::integral T> | |
| double itod(T x) noexcept { | |
| #ifdef __AVX512F__ | |
| if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) { | |
| return _mm_cvtsd_f64(_mm_cvti32_sd(_mm_undefined_pd(), x)); | |
| } else if constexpr (std::is_same_v<T, uint32_t>) { | |
| return _mm_cvtsd_f64(_mm_cvtu64_sd(_mm_undefined_pd(), x)); | |
| } else if constexpr (std::is_same_v<T, int64_t>) { | |
| return _mm_cvtsd_f64(_mm_cvti64_sd(_mm_undefined_pd(), x)); | |
| } else { | |
| return _mm_cvtsd_f64(_mm_cvtu64_sd(_mm_undefined_pd(), x)); | |
| } | |
| #else | |
| if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) { | |
| return _mm_cvtsd_f64(_mm_cvtsi32_sd(_mm_undefined_pd(), x)); | |
| } else if constexpr (std::is_same_v<T, uint32_t> || std::is_same_v<T, int64_t>) { | |
| return _mm_cvtsd_f64(_mm_cvtsi64_sd(_mm_undefined_pd(), x)); | |
| } else { | |
| if ((x & 0x8000000000000000) == 0) { | |
| return _mm_cvtsd_f64(_mm_cvtsi64_sd(_mm_undefined_pd(), x)); | |
| } | |
| const auto tmp = _mm_cvtsi64_sd(_mm_undefined_pd(), (x >> 1) | (x & 1)); | |
| return _mm_cvtsd_f64(_mm_add_sd(tmp, tmp)); | |
| } | |
| #endif | |
| } | |
| template <std::integral T> | |
| T ftoi(float x) noexcept { | |
| #ifdef __AVX512F__ | |
| if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) { | |
| return _mm_cvttss_i32(_mm_set_ss(x)); | |
| } else if constexpr (std::is_same_v<T, uint32_t>) { | |
| return _mm_cvttss_u32(_mm_set_ss(x)); | |
| } else if constexpr (std::is_same_v<T, int64_t>) { | |
| return _mm_cvttss_i64(_mm_set_ss(x)); | |
| } else { | |
| return _mm_cvttss_u64(_mm_set_ss(x)); | |
| } | |
| #else | |
| if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) { | |
| return _mm_cvttss_si32(_mm_set_ss(x)); | |
| } else if constexpr (std::is_same_v<T, uint32_t> || std::is_same_v<T, int64_t>) { | |
| return _mm_cvttss_si64(_mm_set_ss(x)); | |
| } else { | |
| const auto a = _mm_cvttss_si64(_mm_set_ss(x)); | |
| const auto b = _mm_sub_ss(_mm_set_ss(x), _mm_set_ss(9.22337203E+18)); | |
| const auto c = _mm_cvttss_si64(b); | |
| return (c & (a >> 63)) | a; | |
| } | |
| #endif | |
| } | |
| template <std::integral T> | |
| T dtoi(double x) noexcept { | |
| #ifdef __AVX512F__ | |
| if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) { | |
| return _mm_cvttsd_i32(_mm_set_sd(x)); | |
| } else if constexpr (std::is_same_v<T, uint32_t>) { | |
| return _mm_cvttsd_u32(_mm_set_sd(x)); | |
| } else if constexpr (std::is_same_v<T, int64_t>) { | |
| return _mm_cvttsd_i64(_mm_set_sd(x)); | |
| } else { | |
| return _mm_cvttsd_u64(_mm_set_sd(x)); | |
| } | |
| #else | |
| if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) { | |
| return _mm_cvttsd_si32(_mm_set_sd(x)); | |
| } else if constexpr (std::is_same_v<T, uint32_t> || std::is_same_v<T, int64_t>) { | |
| return _mm_cvttsd_si64(_mm_set_sd(x)); | |
| } else { | |
| const auto a = _mm_cvttsd_si64(_mm_set_sd(x)); | |
| const auto b = _mm_sub_sd(_mm_set_sd(x), _mm_set_sd(9.2233720368547758E+18)); | |
| const auto c = _mm_cvttsd_si64(b); | |
| return (c & (a >> 63)) | a; | |
| } | |
| #endif | |
| } | |
| #endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment