Last active
January 26, 2026 04:06
-
-
Save nmoinvaz/2c7a3076049b96b8df4510c455f44720 to your computer and use it in GitHub Desktop.
Benchmark count matching bytes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <benchmark/benchmark.h> | |
| #include <cstdint> | |
| static inline uint32_t count_matching_bytes_ctzll(uint64_t mask) { | |
| return __builtin_ctzll(mask); | |
| } | |
| static inline uint32_t count_matching_bytes_ctz32(uint64_t mask) { | |
| uint32_t lo = (uint32_t)mask; | |
| if (lo) | |
| return __builtin_ctz(lo); | |
| uint32_t hi = (uint32_t)(mask >> 32); | |
| return 32 + __builtin_ctz(hi); | |
| } | |
| static inline uint32_t count_matching_bytes_debruijn(uint64_t mask) { | |
| static const uint8_t debruijn_ctz64[64] = { | |
| 63, 0, 1, 52, 2, 6, 53, 26, 3, 37, 40, 7, 33, 54, 47, 27, | |
| 61, 4, 38, 45, 43, 41, 21, 8, 23, 34, 58, 55, 48, 17, 28, 10, | |
| 62, 51, 5, 25, 36, 39, 32, 46, 60, 44, 42, 20, 22, 57, 16, 9, | |
| 50, 24, 35, 31, 59, 19, 56, 15, 49, 30, 18, 14, 29, 13, 12, 11 | |
| }; | |
| return debruijn_ctz64[((mask & (uint64_t)(-(int64_t)mask)) * 0x045FBAC7992A70DAULL) >> 58]; | |
| } | |
| static const uint64_t test_masks[] = { | |
| 0x0000000000000001ULL, 0x0000000000000100ULL, 0x0000000000010000ULL, | |
| 0x0000000001000000ULL, 0x0000000100000000ULL, 0x0000010000000000ULL, | |
| 0x0001000000000000ULL, 0x0100000000000000ULL, 0x00000000FFFFFFFFULL, | |
| 0xFFFFFFFF00000000ULL, 0xFFFFFFFFFFFFFFFFULL, 0x8000000000000000ULL, | |
| 0xAAAAAAAAAAAAAAAAULL, 0x5555555555555555ULL, 0x00000000000000FFULL, | |
| 0xFF00000000000000ULL, | |
| }; | |
| static const size_t num_masks = sizeof(test_masks) / sizeof(test_masks[0]); | |
| static void BM_ctzll(benchmark::State& state) { | |
| for (auto _ : state) | |
| for (size_t i = 0; i < num_masks; i++) | |
| benchmark::DoNotOptimize(count_matching_bytes_ctzll(test_masks[i])); | |
| } | |
| BENCHMARK(BM_ctzll); | |
| static void BM_ctz32_split(benchmark::State& state) { | |
| for (auto _ : state) | |
| for (size_t i = 0; i < num_masks; i++) | |
| benchmark::DoNotOptimize(count_matching_bytes_ctz32(test_masks[i])); | |
| } | |
| BENCHMARK(BM_ctz32_split); | |
| static void BM_debruijn(benchmark::State& state) { | |
| for (auto _ : state) | |
| for (size_t i = 0; i < num_masks; i++) | |
| benchmark::DoNotOptimize(count_matching_bytes_debruijn(test_masks[i])); | |
| } | |
| BENCHMARK(BM_debruijn); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment