WiwilZ · August 17, 2024 11:58
diff --git a/cast_int_and_float.cpp b/cast_int_and_float.cpp
 #if defined(__SSE2__) || defined(_MSC_VER) && !defined(__clang__) && ((defined(_M_AMD64) || defined(_M_X64)) && !defined(_M_ARM64EC) || defined(_M_IX86) && defined(_M_IX86_FP) && _M_IX86_FP == 2)

 #   if defined(_MSC_VER) && !defined(__clang__)
 #include <intrin.h>
 #   else
 #include <emmintrin.h>
 #include <xmmintrin.h>
 #include <immintrin.h>
 #   endif


 #include <cstdint>
 #include <concepts>


 template <std::integral T>
 float itof(T x) noexcept {
 #ifdef __AVX512F__
    if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) {
        return _mm_cvtss_f32(_mm_cvti32_ss(_mm_undefined_ps(), x));
    } else if constexpr (std::is_same_v<T, uint32_t>) {
        return _mm_cvtss_f32(_mm_cvtu32_ss(_mm_undefined_ps(), x));
    } else if constexpr (std::is_same_v<T, int64_t>) {
        return _mm_cvtss_f32(_mm_cvti64_ss(_mm_undefined_ps(), x));
    } else {
        return _mm_cvtss_f32(_mm_cvtu64_ss(_mm_undefined_ps(), x));
    }
 #else
    if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) {
        return _mm_cvtss_f32(_mm_cvtsi32_ss(_mm_undefined_ps(), x));
    } else if constexpr (std::is_same_v<T, uint32_t> || std::is_same_v<T, int64_t>) {
        return _mm_cvtss_f32(_mm_cvtsi64_ss(_mm_undefined_ps(), x));
    } else {
        if ((x & 0x8000000000000000) == 0) {
            return _mm_cvtss_f32(_mm_cvtsi64_ss(_mm_undefined_ps(), x));
        }
        const auto tmp = _mm_cvtsi64_ss(_mm_undefined_ps(), (x >> 1) | (x & 1));
        return _mm_cvtss_f32(_mm_add_ss(tmp, tmp));
    }
 #endif
 }

 template <std::integral T>
 double itod(T x) noexcept {
 #ifdef __AVX512F__
    if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) {
        return _mm_cvtsd_f64(_mm_cvti32_sd(_mm_undefined_pd(), x));
    } else if constexpr (std::is_same_v<T, uint32_t>) {
        return _mm_cvtsd_f64(_mm_cvtu64_sd(_mm_undefined_pd(), x));
    } else if constexpr (std::is_same_v<T, int64_t>) {
        return _mm_cvtsd_f64(_mm_cvti64_sd(_mm_undefined_pd(), x));
    } else {
        return _mm_cvtsd_f64(_mm_cvtu64_sd(_mm_undefined_pd(), x));
    }
 #else
    if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) {
        return _mm_cvtsd_f64(_mm_cvtsi32_sd(_mm_undefined_pd(), x));
    } else if constexpr (std::is_same_v<T, uint32_t> || std::is_same_v<T, int64_t>) {
        return _mm_cvtsd_f64(_mm_cvtsi64_sd(_mm_undefined_pd(), x));
    } else {
        if ((x & 0x8000000000000000) == 0) {
            return _mm_cvtsd_f64(_mm_cvtsi64_sd(_mm_undefined_pd(), x));
        }
        const auto tmp = _mm_cvtsi64_sd(_mm_undefined_pd(), (x >> 1) | (x & 1));
        return _mm_cvtsd_f64(_mm_add_sd(tmp, tmp));
    }
 #endif
 }


 template <std::integral T>
 T ftoi(float x) noexcept {
 #ifdef __AVX512F__
    if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) {
        return _mm_cvttss_i32(_mm_set_ss(x));
    } else if constexpr (std::is_same_v<T, uint32_t>) {
        return _mm_cvttss_u32(_mm_set_ss(x));
    } else if constexpr (std::is_same_v<T, int64_t>) {
        return _mm_cvttss_i64(_mm_set_ss(x));
    } else {
        return _mm_cvttss_u64(_mm_set_ss(x));
    }
 #else
    if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) {
        return _mm_cvttss_si32(_mm_set_ss(x));
    } else if constexpr (std::is_same_v<T, uint32_t> || std::is_same_v<T, int64_t>) {
        return _mm_cvttss_si64(_mm_set_ss(x));
    } else {
        const auto a = _mm_cvttss_si64(_mm_set_ss(x));
        const auto b = _mm_sub_ss(_mm_set_ss(x), _mm_set_ss(9.22337203E+18));
        const auto c = _mm_cvttss_si64(b);
        return (c & (a >> 63)) | a;
    }
 #endif
 }

 template <std::integral T>
 T dtoi(double x) noexcept {
 #ifdef __AVX512F__
    if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) {
        return _mm_cvttsd_i32(_mm_set_sd(x));
    } else if constexpr (std::is_same_v<T, uint32_t>) {
        return _mm_cvttsd_u32(_mm_set_sd(x));
    } else if constexpr (std::is_same_v<T, int64_t>) {
        return _mm_cvttsd_i64(_mm_set_sd(x));
    } else {
        return _mm_cvttsd_u64(_mm_set_sd(x));
    }
 #else
    if constexpr (sizeof(T) <= 2 || std::is_same_v<T, int32_t>) {
        return _mm_cvttsd_si32(_mm_set_sd(x));
    } else if constexpr (std::is_same_v<T, uint32_t> || std::is_same_v<T, int64_t>) {
        return _mm_cvttsd_si64(_mm_set_sd(x));
    } else {
        const auto a = _mm_cvttsd_si64(_mm_set_sd(x));
        const auto b = _mm_sub_sd(_mm_set_sd(x), _mm_set_sd(9.2233720368547758E+18));
        const auto c = _mm_cvttsd_si64(b);
        return (c & (a >> 63)) | a;
    }
 #endif
 }

 #endif
	#if defined(__SSE2__) \|\| defined(_MSC_VER) && !defined(__clang__) && ((defined(_M_AMD64) \|\| defined(_M_X64)) && !defined(_M_ARM64EC) \|\| defined(_M_IX86) && defined(_M_IX86_FP) && _M_IX86_FP == 2)

	# if defined(_MSC_VER) && !defined(__clang__)
	#include <intrin.h>
	# else
	#include <emmintrin.h>
	#include <xmmintrin.h>
	#include <immintrin.h>
	# endif


	#include <cstdint>
	#include <concepts>


	template <std::integral T>
	float itof(T x) noexcept {
	#ifdef __AVX512F__
	if constexpr (sizeof(T) <= 2 \|\| std::is_same_v<T, int32_t>) {
	return _mm_cvtss_f32(_mm_cvti32_ss(_mm_undefined_ps(), x));
	} else if constexpr (std::is_same_v<T, uint32_t>) {
	return _mm_cvtss_f32(_mm_cvtu32_ss(_mm_undefined_ps(), x));
	} else if constexpr (std::is_same_v<T, int64_t>) {
	return _mm_cvtss_f32(_mm_cvti64_ss(_mm_undefined_ps(), x));
	} else {
	return _mm_cvtss_f32(_mm_cvtu64_ss(_mm_undefined_ps(), x));
	}
	#else
	if constexpr (sizeof(T) <= 2 \|\| std::is_same_v<T, int32_t>) {
	return _mm_cvtss_f32(_mm_cvtsi32_ss(_mm_undefined_ps(), x));
	} else if constexpr (std::is_same_v<T, uint32_t> \|\| std::is_same_v<T, int64_t>) {
	return _mm_cvtss_f32(_mm_cvtsi64_ss(_mm_undefined_ps(), x));
	} else {
	if ((x & 0x8000000000000000) == 0) {
	return _mm_cvtss_f32(_mm_cvtsi64_ss(_mm_undefined_ps(), x));
	}
	const auto tmp = _mm_cvtsi64_ss(_mm_undefined_ps(), (x >> 1) \| (x & 1));
	return _mm_cvtss_f32(_mm_add_ss(tmp, tmp));
	}
	#endif
	}

	template <std::integral T>
	double itod(T x) noexcept {
	#ifdef __AVX512F__
	if constexpr (sizeof(T) <= 2 \|\| std::is_same_v<T, int32_t>) {
	return _mm_cvtsd_f64(_mm_cvti32_sd(_mm_undefined_pd(), x));
	} else if constexpr (std::is_same_v<T, uint32_t>) {
	return _mm_cvtsd_f64(_mm_cvtu64_sd(_mm_undefined_pd(), x));
	} else if constexpr (std::is_same_v<T, int64_t>) {
	return _mm_cvtsd_f64(_mm_cvti64_sd(_mm_undefined_pd(), x));
	} else {
	return _mm_cvtsd_f64(_mm_cvtu64_sd(_mm_undefined_pd(), x));
	}
	#else
	if constexpr (sizeof(T) <= 2 \|\| std::is_same_v<T, int32_t>) {
	return _mm_cvtsd_f64(_mm_cvtsi32_sd(_mm_undefined_pd(), x));
	} else if constexpr (std::is_same_v<T, uint32_t> \|\| std::is_same_v<T, int64_t>) {
	return _mm_cvtsd_f64(_mm_cvtsi64_sd(_mm_undefined_pd(), x));
	} else {
	if ((x & 0x8000000000000000) == 0) {
	return _mm_cvtsd_f64(_mm_cvtsi64_sd(_mm_undefined_pd(), x));
	}
	const auto tmp = _mm_cvtsi64_sd(_mm_undefined_pd(), (x >> 1) \| (x & 1));
	return _mm_cvtsd_f64(_mm_add_sd(tmp, tmp));
	}
	#endif
	}


	template <std::integral T>
	T ftoi(float x) noexcept {
	#ifdef __AVX512F__
	if constexpr (sizeof(T) <= 2 \|\| std::is_same_v<T, int32_t>) {
	return _mm_cvttss_i32(_mm_set_ss(x));
	} else if constexpr (std::is_same_v<T, uint32_t>) {
	return _mm_cvttss_u32(_mm_set_ss(x));
	} else if constexpr (std::is_same_v<T, int64_t>) {
	return _mm_cvttss_i64(_mm_set_ss(x));
	} else {
	return _mm_cvttss_u64(_mm_set_ss(x));
	}
	#else
	if constexpr (sizeof(T) <= 2 \|\| std::is_same_v<T, int32_t>) {
	return _mm_cvttss_si32(_mm_set_ss(x));
	} else if constexpr (std::is_same_v<T, uint32_t> \|\| std::is_same_v<T, int64_t>) {
	return _mm_cvttss_si64(_mm_set_ss(x));
	} else {
	const auto a = _mm_cvttss_si64(_mm_set_ss(x));
	const auto b = _mm_sub_ss(_mm_set_ss(x), _mm_set_ss(9.22337203E+18));
	const auto c = _mm_cvttss_si64(b);
	return (c & (a >> 63)) \| a;
	}
	#endif
	}

	template <std::integral T>
	T dtoi(double x) noexcept {
	#ifdef __AVX512F__
	if constexpr (sizeof(T) <= 2 \|\| std::is_same_v<T, int32_t>) {
	return _mm_cvttsd_i32(_mm_set_sd(x));
	} else if constexpr (std::is_same_v<T, uint32_t>) {
	return _mm_cvttsd_u32(_mm_set_sd(x));
	} else if constexpr (std::is_same_v<T, int64_t>) {
	return _mm_cvttsd_i64(_mm_set_sd(x));
	} else {
	return _mm_cvttsd_u64(_mm_set_sd(x));
	}
	#else
	if constexpr (sizeof(T) <= 2 \|\| std::is_same_v<T, int32_t>) {
	return _mm_cvttsd_si32(_mm_set_sd(x));
	} else if constexpr (std::is_same_v<T, uint32_t> \|\| std::is_same_v<T, int64_t>) {
	return _mm_cvttsd_si64(_mm_set_sd(x));
	} else {
	const auto a = _mm_cvttsd_si64(_mm_set_sd(x));
	const auto b = _mm_sub_sd(_mm_set_sd(x), _mm_set_sd(9.2233720368547758E+18));
	const auto c = _mm_cvttsd_si64(b);
	return (c & (a >> 63)) \| a;
	}
	#endif
	}

	#endif
No results found