typedef struct {
U32 elements;
S32 position;
} Cache;
void
CacheInit(Cache* cache)
{
ASSERT(cache);
cache->elements = 0;
cache->position = 0;
}
U8
CachePeek(Cache* cache, S32 index)
{
ASSERT(cache);
ASSERT(index >= 0 && index <= 3);
return cache->elements >> (index << 3);
}
void
CachePush(Cache* cache, U8 element)
{
ASSERT(cache);
const U32 kOffset = cache->position << 3;
cache->elements &= ~(0xff << kOffset);
cache->elements |= element << kOffset;
cache->position = (cache->position + 1) & 3;
}
// Returns the index of element if cached, 4 otherwise.
S32
CacheLookup(Cache* cache, U8 element)
{
ASSERT(cache);
// Use SIMD Within A Register (SWAR) to efficiently find a byte within a 32-bit word. First, set
// matching bytes to 0 by XORing with value splat over a word. Then, set least significant bits
// of 0 bytes (http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord). Finally, count
// trailing bits to get the index. ORing kLsbs with 2^32 allows to avoid the undefined behavior
// of __builtin_ctzll() with 0 (without branching) and to return 4 when the value is not found.
const U32 kZeros = cache->elements ^ (element | element << 8 | element << 16 | element << 24);
const U32 kLsbs = (kZeros - 0x01010101) & ~kZeros & 0x80808080;
return __builtin_ctzll(kLsbs | 0x100000000) >> 3;
}
Created
May 22, 2020 13:09
-
-
Save loicmolinari/7846f5197f02c87e764115e34c7f1114 to your computer and use it in GitHub Desktop.
Branch free 4xU8 elements ring buffer cache
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment