Skip to content

Instantly share code, notes, and snippets.

@WiwilZ
Last active August 17, 2024 11:58
Show Gist options
  • Select an option

  • Save WiwilZ/4bfd3ff56203bdabdd3735628506df3e to your computer and use it in GitHub Desktop.

Select an option

Save WiwilZ/4bfd3ff56203bdabdd3735628506df3e to your computer and use it in GitHub Desktop.
cast function between integer and floating point with intel intrinsics
#if defined(__SSE2__) || defined(_MSC_VER) && !defined(__clang__) && ((defined(_M_AMD64) || defined(_M_X64)) && !defined(_M_ARM64EC) || defined(_M_IX86) && defined(_M_IX86_FP) && _M_IX86_FP == 2)
# if defined(_MSC_VER) && !defined(__clang__)
#include <intrin.h>
# else
#include <emmintrin.h>
#include <xmmintrin.h>
#include <immintrin.h>
# endif
#include <cstdint>
#include <concepts>
template <std::integral T>
float itof(T x) noexcept {
#ifdef __AVX512F__
if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) {
return _mm_cvtss_f32(_mm_cvti32_ss(_mm_undefined_ps(), x));
} else if constexpr (std::is_same_v<T, uint32_t>) {
return _mm_cvtss_f32(_mm_cvtu32_ss(_mm_undefined_ps(), x));
} else if constexpr (std::is_same_v<T, int64_t>) {
return _mm_cvtss_f32(_mm_cvti64_ss(_mm_undefined_ps(), x));
} else {
return _mm_cvtss_f32(_mm_cvtu64_ss(_mm_undefined_ps(), x));
}
#else
if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) {
return _mm_cvtss_f32(_mm_cvtsi32_ss(_mm_undefined_ps(), x));
} else if constexpr (std::is_same_v<T, uint32_t> || std::is_same_v<T, int64_t>) {
return _mm_cvtss_f32(_mm_cvtsi64_ss(_mm_undefined_ps(), x));
} else {
if ((x & 0x8000000000000000) == 0) {
return _mm_cvtss_f32(_mm_cvtsi64_ss(_mm_undefined_ps(), x));
}
const auto tmp = _mm_cvtsi64_ss(_mm_undefined_ps(), (x >> 1) | (x & 1));
return _mm_cvtss_f32(_mm_add_ss(tmp, tmp));
}
#endif
}
template <std::integral T>
double itod(T x) noexcept {
#ifdef __AVX512F__
if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) {
return _mm_cvtsd_f64(_mm_cvti32_sd(_mm_undefined_pd(), x));
} else if constexpr (std::is_same_v<T, uint32_t>) {
return _mm_cvtsd_f64(_mm_cvtu64_sd(_mm_undefined_pd(), x));
} else if constexpr (std::is_same_v<T, int64_t>) {
return _mm_cvtsd_f64(_mm_cvti64_sd(_mm_undefined_pd(), x));
} else {
return _mm_cvtsd_f64(_mm_cvtu64_sd(_mm_undefined_pd(), x));
}
#else
if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) {
return _mm_cvtsd_f64(_mm_cvtsi32_sd(_mm_undefined_pd(), x));
} else if constexpr (std::is_same_v<T, uint32_t> || std::is_same_v<T, int64_t>) {
return _mm_cvtsd_f64(_mm_cvtsi64_sd(_mm_undefined_pd(), x));
} else {
if ((x & 0x8000000000000000) == 0) {
return _mm_cvtsd_f64(_mm_cvtsi64_sd(_mm_undefined_pd(), x));
}
const auto tmp = _mm_cvtsi64_sd(_mm_undefined_pd(), (x >> 1) | (x & 1));
return _mm_cvtsd_f64(_mm_add_sd(tmp, tmp));
}
#endif
}
template <std::integral T>
T ftoi(float x) noexcept {
#ifdef __AVX512F__
if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) {
return _mm_cvttss_i32(_mm_set_ss(x));
} else if constexpr (std::is_same_v<T, uint32_t>) {
return _mm_cvttss_u32(_mm_set_ss(x));
} else if constexpr (std::is_same_v<T, int64_t>) {
return _mm_cvttss_i64(_mm_set_ss(x));
} else {
return _mm_cvttss_u64(_mm_set_ss(x));
}
#else
if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) {
return _mm_cvttss_si32(_mm_set_ss(x));
} else if constexpr (std::is_same_v<T, uint32_t> || std::is_same_v<T, int64_t>) {
return _mm_cvttss_si64(_mm_set_ss(x));
} else {
const auto a = _mm_cvttss_si64(_mm_set_ss(x));
const auto b = _mm_sub_ss(_mm_set_ss(x), _mm_set_ss(9.22337203E+18));
const auto c = _mm_cvttss_si64(b);
return (c & (a >> 63)) | a;
}
#endif
}
template <std::integral T>
T dtoi(double x) noexcept {
#ifdef __AVX512F__
if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) {
return _mm_cvttsd_i32(_mm_set_sd(x));
} else if constexpr (std::is_same_v<T, uint32_t>) {
return _mm_cvttsd_u32(_mm_set_sd(x));
} else if constexpr (std::is_same_v<T, int64_t>) {
return _mm_cvttsd_i64(_mm_set_sd(x));
} else {
return _mm_cvttsd_u64(_mm_set_sd(x));
}
#else
if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) {
return _mm_cvttsd_si32(_mm_set_sd(x));
} else if constexpr (std::is_same_v<T, uint32_t> || std::is_same_v<T, int64_t>) {
return _mm_cvttsd_si64(_mm_set_sd(x));
} else {
const auto a = _mm_cvttsd_si64(_mm_set_sd(x));
const auto b = _mm_sub_sd(_mm_set_sd(x), _mm_set_sd(9.2233720368547758E+18));
const auto c = _mm_cvttsd_si64(b);
return (c & (a >> 63)) | a;
}
#endif
}
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment