08jne01 · August 30, 2025 01:09 · 08jne01 · Oct 29, 2024
diff --git a/BinarySearch.h b/BinarySearch.h
 /*
 MIT License

 Copyright (c) 2024 Joshua Nelson

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:

 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 */


 #pragma once
 #include <bit>
 #include <span>
 #include <vector>
 #include <array>

 #ifndef BINARY_SEARCH_CRITICAL_INLINE
 #   if defined(_MSC_VER)
 #       define BINARY_SEARCH_CRITICAL_INLINE __forceinline
 #   elif defined(__GNUC__) && __GNUC__>3
 #       define BINARY_SEARCH_CRITICAL_INLINE inline __attribute__ ((always_inline))
 #   else
 #       define BINARY_SEARCH_CRITICAL_INLINE inline
 #   endif
 #endif

 namespace Utility
 {
    
 // This just forces fixed span into the compiler unrolled binary search.
 template<typename TySpan>
 concept FixedSpan = 
    TySpan::extent != std::dynamic_extent && 
    std::is_const_v<typename TySpan::element_type>; // Just enforces const span.

 // This forces the span into the dynamic binary search when the input is dynamic.
 // This is a weird thing about span and templates. Without the concepts it would be possible to call the fixed N variant accidently.
 template<typename TySpan>
 concept DynamicSpan = 
    TySpan::extent == std::dynamic_extent && 
    std::is_const_v<typename TySpan::element_type>; // Just enforces const span.

 // This function forces compile time bit ceil otherwise the MSVC compiler
 // will not do it and put in a branch to check for the lzcnt instruction
 // and worst case count the leading zeros one at a time using bit shifts.
 consteval size_t BitCeil( size_t n )
 {
    return std::bit_ceil(n);
 }

 // Compile time Log2
 consteval size_t Log2( size_t n )
 {
    return std::bit_width(n) - 1;
 }

 // Note this is a BinarySearch utility for Look up tables, so it aims to return the index
 // which the corresponding value is less than the supplied x.
 // Both functions are guaranteed to return size - 2;

 // Source here https://probablydance.com/2023/04/27/beautiful-branchless-binary-search/
 // Slightly Modified.
 // The loop doesn't get unrolled if you use their implementation, however if calculate the number of steps
 // by taking the log2(N) (done using std::bit_width) and creating the loop to be that size the MSVC compiler
 // will unroll.
 template<FixedSpan TySpan>
 BINARY_SEARCH_CRITICAL_INLINE constexpr size_t BinarySearch( TySpan array, typename TySpan::value_type x )
 {
    constexpr size_t N = TySpan::extent;
    // Just in case
    static_assert( N != std::dynamic_extent, "Cannot call compile time size binary search with dynamic span" );

    // Compile time optimsation if the array is 0,1,2 long.
    // In all these cases we are taking indexing from 0.
    if constexpr ( N <= 2 )
        return 0;

    // Early bail out
    if ( x <= array.front() )
        return 0;

    if ( x >= array.back() )
        return N - 2;

    // This is the resultant pointer
    // It's the left side of the current bin being searched
    // so we can just pretend this is a "subspan".
    size_t begin = 0;

    // This algorithm divides the space in two each time, however in order to this it
    // needs to have a clean power of 2. We find the power of 2 closest to the mid point.
    // We do this by doing std::bit_floor and std::bit_ceil, these return either the power of
    // two below/equal or power of two above.
    size_t step;

    constexpr size_t N_bit_floor = std::bit_floor( N );
    if constexpr ( N_bit_floor != N )
    {
        // Check if x is left or right of closest power of 2
        // This basically does one check at the start to get us to
        // a clean power of two, either we take the side greater than
        // the power of 2 or the side less than the power of 2.
        if ( array[N_bit_floor] <= x )
        {
            // If power of two is less than x it makes x in the right
            // section so remove everything up to power of 2.
            constexpr size_t new_length = N - N_bit_floor - 1;

            // This returns zero when array is exactly 3, 5, 9... 
            // 1 greater than a power of 2
            // (N - 2^(floor(2^log(N)) - 1)
            // If the remaining length is zero we return
            // the element before the last. Similar to above.
            if ( new_length == 0 )
                return N-2;

            // If not zero then we go from our new search position.
            // We must now offset the start position such that the step is a power of 2
            // This means there will be overlap in the search regions but it is the price.

            constexpr size_t ceil = BitCeil(new_length);

            step = ceil / 2; // save one division by doing this at compile time
            begin = N - 1 - ceil;
        }
        else
        {
            step = N_bit_floor / 2; // save one division by doing this at compile time
        }
    }
    else
    {
        step = N_bit_floor / 2; // save one division by doing this at compile time
    }

    // We calculate the number of steps required. This is really important
    // because otherwise the compiler will not unroll the loop.
    // This may result in one extra comparison, but compared to having a loop
    // with N comparisons it's a no brainer.
    // Algorithm will always complete in max of log2(N) which is what below calculates.
    constexpr size_t steps = Log2(N);

    for ( size_t i = 0; i < steps; i++ )
    {
        // This selects the search space by offsetting it.
        //      step           step
        // |<----------->|<----------->|
        // b-------------S-------------|
        // where b is the current begin and S = b+step
        // if the value at S is greater than x then x is in the left region
        // no need to move b
        // however if S is less than x then x is in the right region
        // so we reduce the space by shifting b to where S is
        //      step           step
        // |<----------->|<----------->|
        // --------------b------S'-----|        
        // when the step size halfs next round
        // S' = b + new step size
        // This recursively narrows down on the target in 
        // maximum log2(n) steps.
        if ( x >= array[begin + step] )
            begin += step;
            

        // Each time we divide by two, this halfs the space we are going to move the mid point each time.
        // On the first step it's *half* (kinda see above) the array, then next step its half of the remaining
        // and so on.
        step /= 2; // we now do this after to save one division at the start.
    }

    // Need to calculate where the begin pointer has landed to calculate the final index.
    return begin;
 }

 // For detailed explaination of the algorithm used see above.
 // The comments here will just describe the differences using
 // a runtime array.
 template<DynamicSpan TySpan>
 BINARY_SEARCH_CRITICAL_INLINE size_t BinarySearch( TySpan array, typename TySpan::value_type x )
 {
    // No need to check if the array size is zero
    // because the only reason that was there was for
    // compile time optimisation.
    if ( x <= array.front() )
        return 0;

    if ( x >= array.back() )
        return array.size() - 2;

    // This is the resultant index
    // It's the left side of the current bin being searched
    size_t begin = 0;

    size_t step;

    const size_t N_bit_floor = std::bit_floor( array.size() );
    if ( N_bit_floor != array.size() )
    {
        // Check if the first value is on the left or right.
        if ( array[N_bit_floor] <= x )
        {
            const size_t new_length = array.size() - N_bit_floor - 1;
            if ( new_length == 0 )
                return 0;

            step = std::bit_ceil( new_length );
            begin = array.size() - 1 - step;
        }
        else
        {
            step = N_bit_floor;
        }
    }
    else
    {
        step = N_bit_floor;
    }

    // Unlike the array version we cannot calculate the fixed number of steps
    // because the steps are not known at runtime. This will result in a loop.
    // So use array for better performance.
    for ( step /= 2; step != 0; step /= 2 )
    {
        if ( x >= array[begin + step] )
            begin += step;
    }

    return begin;
 }

 // Overloads for std::array and std::vector.
 template<typename T, size_t N>
 BINARY_SEARCH_CRITICAL_INLINE constexpr size_t BinarySearch( const std::array<T, N>& array, T x )
 {
    return BinarySearch( std::span<const T,N>( array ), x );
 }

 template<typename T>
 BINARY_SEARCH_CRITICAL_INLINE constexpr size_t BinarySearch( const std::vector<T>& array, T x )
 {
    return BinarySearch( std::span<const T>( array ), x );
 }


 } // end namespace Utility
	/*
	MIT License

	Copyright (c) 2024 Joshua Nelson

	Permission is hereby granted, free of charge, to any person obtaining a copy
	of this software and associated documentation files (the "Software"), to deal
	in the Software without restriction, including without limitation the rights
	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	copies of the Software, and to permit persons to whom the Software is
	furnished to do so, subject to the following conditions:

	The above copyright notice and this permission notice shall be included in all
	copies or substantial portions of the Software.

	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	SOFTWARE.
	*/


	#pragma once
	#include <bit>
	#include <span>
	#include <vector>
	#include <array>

	#ifndef BINARY_SEARCH_CRITICAL_INLINE
	# if defined(_MSC_VER)
	# define BINARY_SEARCH_CRITICAL_INLINE __forceinline
	# elif defined(__GNUC__) && __GNUC__>3
	# define BINARY_SEARCH_CRITICAL_INLINE inline __attribute__ ((always_inline))
	# else
	# define BINARY_SEARCH_CRITICAL_INLINE inline
	# endif
	#endif

	namespace Utility
	{

	// This just forces fixed span into the compiler unrolled binary search.
	template<typename TySpan>
	concept FixedSpan =
	TySpan::extent != std::dynamic_extent &&
	std::is_const_v<typename TySpan::element_type>; // Just enforces const span.

	// This forces the span into the dynamic binary search when the input is dynamic.
	// This is a weird thing about span and templates. Without the concepts it would be possible to call the fixed N variant accidently.
	template<typename TySpan>
	concept DynamicSpan =
	TySpan::extent == std::dynamic_extent &&
	std::is_const_v<typename TySpan::element_type>; // Just enforces const span.

	// This function forces compile time bit ceil otherwise the MSVC compiler
	// will not do it and put in a branch to check for the lzcnt instruction
	// and worst case count the leading zeros one at a time using bit shifts.
	consteval size_t BitCeil( size_t n )
	{
	return std::bit_ceil(n);
	}

	// Compile time Log2
	consteval size_t Log2( size_t n )
	{
	return std::bit_width(n) - 1;
	}

	// Note this is a BinarySearch utility for Look up tables, so it aims to return the index
	// which the corresponding value is less than the supplied x.
	// Both functions are guaranteed to return size - 2;

	// Source here https://probablydance.com/2023/04/27/beautiful-branchless-binary-search/
	// Slightly Modified.
	// The loop doesn't get unrolled if you use their implementation, however if calculate the number of steps
	// by taking the log2(N) (done using std::bit_width) and creating the loop to be that size the MSVC compiler
	// will unroll.
	template<FixedSpan TySpan>
	BINARY_SEARCH_CRITICAL_INLINE constexpr size_t BinarySearch( TySpan array, typename TySpan::value_type x )
	{
	constexpr size_t N = TySpan::extent;
	// Just in case
	static_assert( N != std::dynamic_extent, "Cannot call compile time size binary search with dynamic span" );

	// Compile time optimsation if the array is 0,1,2 long.
	// In all these cases we are taking indexing from 0.
	if constexpr ( N <= 2 )
	return 0;

	// Early bail out
	if ( x <= array.front() )
	return 0;

	if ( x >= array.back() )
	return N - 2;

	// This is the resultant pointer
	// It's the left side of the current bin being searched
	// so we can just pretend this is a "subspan".
	size_t begin = 0;

	// This algorithm divides the space in two each time, however in order to this it
	// needs to have a clean power of 2. We find the power of 2 closest to the mid point.
	// We do this by doing std::bit_floor and std::bit_ceil, these return either the power of
	// two below/equal or power of two above.
	size_t step;

	constexpr size_t N_bit_floor = std::bit_floor( N );
	if constexpr ( N_bit_floor != N )
	{
	// Check if x is left or right of closest power of 2
	// This basically does one check at the start to get us to
	// a clean power of two, either we take the side greater than
	// the power of 2 or the side less than the power of 2.
	if ( array[N_bit_floor] <= x )
	{
	// If power of two is less than x it makes x in the right
	// section so remove everything up to power of 2.
	constexpr size_t new_length = N - N_bit_floor - 1;

	// This returns zero when array is exactly 3, 5, 9...
	// 1 greater than a power of 2
	// (N - 2^(floor(2^log(N)) - 1)
	// If the remaining length is zero we return
	// the element before the last. Similar to above.
	if ( new_length == 0 )
	return N-2;

	// If not zero then we go from our new search position.
	// We must now offset the start position such that the step is a power of 2
	// This means there will be overlap in the search regions but it is the price.

	constexpr size_t ceil = BitCeil(new_length);

	step = ceil / 2; // save one division by doing this at compile time
	begin = N - 1 - ceil;
	}
	else
	{
	step = N_bit_floor / 2; // save one division by doing this at compile time
	}
	}
	else
	{
	step = N_bit_floor / 2; // save one division by doing this at compile time
	}

	// We calculate the number of steps required. This is really important
	// because otherwise the compiler will not unroll the loop.
	// This may result in one extra comparison, but compared to having a loop
	// with N comparisons it's a no brainer.
	// Algorithm will always complete in max of log2(N) which is what below calculates.
	constexpr size_t steps = Log2(N);

	for ( size_t i = 0; i < steps; i++ )
	{
	// This selects the search space by offsetting it.
	// step step
	// \|<----------->\|<----------->\|
	// b-------------S-------------\|
	// where b is the current begin and S = b+step
	// if the value at S is greater than x then x is in the left region
	// no need to move b
	// however if S is less than x then x is in the right region
	// so we reduce the space by shifting b to where S is
	// step step
	// \|<----------->\|<----------->\|
	// --------------b------S'-----\|
	// when the step size halfs next round
	// S' = b + new step size
	// This recursively narrows down on the target in
	// maximum log2(n) steps.
	if ( x >= array[begin + step] )
	begin += step;


	// Each time we divide by two, this halfs the space we are going to move the mid point each time.
	// On the first step it's half (kinda see above) the array, then next step its half of the remaining
	// and so on.
	step /= 2; // we now do this after to save one division at the start.
	}

	// Need to calculate where the begin pointer has landed to calculate the final index.
	return begin;
	}

	// For detailed explaination of the algorithm used see above.
	// The comments here will just describe the differences using
	// a runtime array.
	template<DynamicSpan TySpan>
	BINARY_SEARCH_CRITICAL_INLINE size_t BinarySearch( TySpan array, typename TySpan::value_type x )
	{
	// No need to check if the array size is zero
	// because the only reason that was there was for
	// compile time optimisation.
	if ( x <= array.front() )
	return 0;

	if ( x >= array.back() )
	return array.size() - 2;

	// This is the resultant index
	// It's the left side of the current bin being searched
	size_t begin = 0;

	size_t step;

	const size_t N_bit_floor = std::bit_floor( array.size() );
	if ( N_bit_floor != array.size() )
	{
	// Check if the first value is on the left or right.
	if ( array[N_bit_floor] <= x )
	{
	const size_t new_length = array.size() - N_bit_floor - 1;
	if ( new_length == 0 )
	return 0;

	step = std::bit_ceil( new_length );
	begin = array.size() - 1 - step;
	}
	else
	{
	step = N_bit_floor;
	}
	}
	else
	{
	step = N_bit_floor;
	}

	// Unlike the array version we cannot calculate the fixed number of steps
	// because the steps are not known at runtime. This will result in a loop.
	// So use array for better performance.
	for ( step /= 2; step != 0; step /= 2 )
	{
	if ( x >= array[begin + step] )
	begin += step;
	}

	return begin;
	}

	// Overloads for std::array and std::vector.
	template<typename T, size_t N>
	BINARY_SEARCH_CRITICAL_INLINE constexpr size_t BinarySearch( const std::array<T, N>& array, T x )
	{
	return BinarySearch( std::span<const T,N>( array ), x );
	}

	template<typename T>
	BINARY_SEARCH_CRITICAL_INLINE constexpr size_t BinarySearch( const std::vector<T>& array, T x )
	{
	return BinarySearch( std::span<const T>( array ), x );
	}


	} // end namespace Utility
No results found