Created
December 3, 2025 03:20
-
-
Save CAFxX/3a658ef94bb68048e7fcab77f7e21b50 to your computer and use it in GitHub Desktop.
interncache
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package interncache | |
| import ( | |
| "encoding/binary" | |
| "iter" | |
| "maps" | |
| "math" | |
| "math/bits" | |
| "math/rand/v2" | |
| "sync/atomic" | |
| "unsafe" | |
| ) | |
| type InternCache[K comparable, V any] struct { | |
| mp unsafe.Pointer // map[K]V (see pmapToMap/mapToPmap) | |
| maxSize int | |
| } | |
| func NewInternCache[K comparable, V any](maxSize int) *InternCache[K, V] { | |
| return &InternCache[K, V]{ | |
| maxSize: maxSize, | |
| mp: mapToPmap(make(map[K]V, maxSize)), | |
| } | |
| } | |
| func (ic *InternCache[K, V]) Shard() *InternCacheShard[K, V] { | |
| return &InternCacheShard[K, V]{ | |
| ic: ic, | |
| m: make(map[K]V), | |
| rndstate: 1, | |
| } | |
| } | |
| func (ic *InternCache[K, V]) get(k K) (V, bool) { | |
| m := ic.loadMap() | |
| v, ok := m[k] | |
| return v, ok | |
| } | |
| func pmapToMap[K comparable, V any](p unsafe.Pointer) map[K]V { | |
| var m map[K]V | |
| *(*unsafe.Pointer)(unsafe.Pointer(&m)) = p | |
| return m | |
| } | |
| func mapToPmap[K comparable, V any](m map[K]V) unsafe.Pointer { | |
| return *(*unsafe.Pointer)(unsafe.Pointer(&m)) | |
| } | |
| func (ic *InternCache[K, V]) loadMap() map[K]V { | |
| // TODO: can we relax this atomic load? | |
| return pmapToMap[K, V](atomic.LoadPointer(&ic.mp)) | |
| } | |
| func (ic *InternCache[K, V]) setMap(m map[K]V) { | |
| atomic.StorePointer(&ic.mp, mapToPmap(m)) | |
| } | |
| func (ic *InternCache[K, V]) casMap(om, nm map[K]V) bool { | |
| return atomic.CompareAndSwapPointer(&ic.mp, mapToPmap(om), mapToPmap(nm)) | |
| } | |
| type InternCacheShard[K comparable, V any] struct { | |
| ic *InternCache[K, V] | |
| m map[K]V | |
| rndstate uint64 | |
| hit uint32 | |
| get uint32 | |
| } | |
| func (ics *InternCacheShard[K, V]) Get(k K) (v V, ok bool) { | |
| if ics.skipRandom() { | |
| var zero V | |
| return zero, false | |
| } | |
| if v, ok = ics.getLocal(k); !ok { | |
| v, ok = ics.ic.get(k) | |
| if ok { | |
| ics.Set(k, v) | |
| } | |
| } | |
| ics.skipUpdateState(ok) | |
| return v, ok | |
| } | |
| func (ics *InternCacheShard[K, V]) skipUpdateState(ok bool) { | |
| const scaleThr = 1000 | |
| ics.get++ | |
| if ok { | |
| ics.hit++ | |
| } | |
| if ics.get >= scaleThr { | |
| ics.get /= 2 | |
| ics.hit /= 2 | |
| } | |
| } | |
| func (ics *InternCacheShard[K, V]) skipRandom() (skip bool) { | |
| return ics.rnd(ics.get+1) > ics.hit+1 | |
| } | |
| func (ics *InternCacheShard[K, V]) rnd(ub uint32) uint32 { | |
| r, _ := bits.Mul64(ics.rndstate, uint64(ub)+1) | |
| ics.rndstate *= 0xda942042e4dd58b5 | |
| return uint32(r >> 32) | |
| } | |
| func (ics *InternCacheShard[K, V]) getLocal(k K) (V, bool) { | |
| v, ok := ics.m[k] | |
| return v, ok | |
| } | |
| func (ics *InternCacheShard[K, V]) Set(k K, v V) { | |
| ics.m[k] = v | |
| } | |
| func (ics *InternCacheShard[K, V]) GetOrSet(k K, f func(K) V) V { | |
| if v, ok := ics.Get(k); ok { | |
| return v | |
| } | |
| v := f(k) | |
| ics.Set(k, v) | |
| return v | |
| } | |
| func (ics *InternCacheShard[K, V]) Merge() { | |
| nm := make(map[K]V, ics.ic.maxSize) | |
| for { | |
| gm := ics.ic.loadMap() | |
| for k, v := range limit(mergeMaps(ics.m, gm), ics.ic.maxSize) { | |
| nm[k] = v | |
| } | |
| if ics.ic.casMap(gm, nm) { | |
| return | |
| } | |
| clear(nm) | |
| } | |
| } | |
| func mergeMaps[K comparable, V any](a, b map[K]V) iter.Seq2[K, V] { | |
| // Trivial cases: one or both of the maps are empty | |
| switch { | |
| case len(a) == 0 && len(b) == 0: | |
| return iter.Seq2[K, V](func(_ func(K, V) bool) {}) | |
| case len(a) == 0: | |
| return maps.All(b) | |
| case len(b) == 0: | |
| return maps.All(a) | |
| } | |
| return iter.Seq2[K, V](func(yield func(K, V) bool) { | |
| // First, return the common elements | |
| s, l := a, b | |
| if len(b) < len(a) { | |
| s, l = b, a | |
| } | |
| for k, v := range s { | |
| if _, ok := l[k]; !ok { | |
| continue | |
| } | |
| if !yield(k, v) { | |
| return | |
| } | |
| } | |
| // Second, return the non-common elements, interleaved | |
| na, sa := iter.Pull2(maps.All(a)) | |
| defer sa() | |
| nb, sb := iter.Pull2(maps.All(b)) | |
| defer sb() | |
| for { | |
| ra: | |
| ka, va, oa := na() | |
| if oa { | |
| if _, ok := b[ka]; ok { | |
| goto ra | |
| } | |
| if !yield(ka, va) { | |
| return | |
| } | |
| } | |
| rb: | |
| kb, vb, ob := nb() | |
| if ob { | |
| if _, ok := a[kb]; ok { | |
| goto rb | |
| } | |
| if !yield(kb, vb) { | |
| return | |
| } | |
| } | |
| if !oa && !ob { | |
| return | |
| } | |
| } | |
| }) | |
| } | |
| func limit[K, V any](i iter.Seq2[K, V], n int) iter.Seq2[K, V] { | |
| if n < 0 { | |
| panic("negative n") | |
| } else if n == 0 { | |
| return iter.Seq2[K, V](func(_ func(K, V) bool) {}) | |
| } | |
| return iter.Seq2[K, V](func(yield func(K, V) bool) { | |
| for k, v := range i { | |
| if !yield(k, v) || n <= 1 { | |
| return | |
| } | |
| n-- | |
| } | |
| }) | |
| } | |
| func main() { | |
| ic := NewInternCache[string, string](256) | |
| ic.Shard().Get("hello") | |
| } | |
| type fastmap struct { | |
| table []string | |
| lt int | |
| n int | |
| m uint64 | |
| s uint64 | |
| } | |
| func initFastmap(n int) *fastmap { | |
| if n < 0 { | |
| panic("invalid n") | |
| } | |
| return &fastmap{ | |
| table: make([]string, 1<<n), | |
| n: n, | |
| m: (1<<n)-1, | |
| s: rand.Uint64(), | |
| lt: math.MaxInt, | |
| } | |
| } | |
| const byteStrings = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" | |
| const ( | |
| p0 = 0xe8d92994a71175cb | |
| p1 = 0xc2d05d08d8be5e97 | |
| p2 = 0xf5a052622d79b86b | |
| p3 = 0xe0b968bc0bf9f111 | |
| p4 = 0xb93b4d54530b4f15 | |
| p5 = 0xc086bc7dc1486afb | |
| c1 = 0xed7b0c7a286f6811 | |
| c3 = 0xb453bbc564645955 | |
| c5 = 0x8ebf1c62c27e2453 | |
| ) | |
| func (f *fastmap) get(s string) string { | |
| var h uint64 | |
| switch len(s) { | |
| case 0: | |
| return "" | |
| case 1: | |
| idx := s[0] | |
| return byteStrings[idx:idx+1] | |
| case 2: | |
| h = f.hash(2, uint64(binary.NativeEndian.Uint16([]byte(s[0:1]))), 0) | |
| case 3: | |
| h = f.hash(3, (uint64(binary.NativeEndian.Uint16([]byte(s[0:1])))<<8) | uint64(s[2]), 0) | |
| case 4: | |
| h = f.hash(4, uint64(binary.NativeEndian.Uint32([]byte(s[0:3]))), 0) | |
| case 5: | |
| h = f.hash(5, (uint64(binary.NativeEndian.Uint32([]byte(s[0:3])))<<8) | uint64(s[4]), 0) | |
| case 6: | |
| h = f.hash(6, (uint64(binary.NativeEndian.Uint32([]byte(s[0:3])))<<32) | uint64(binary.NativeEndian.Uint32([]byte(s[1:4]))), 0) | |
| case 7: | |
| h = f.hash(7, (uint64(binary.NativeEndian.Uint32([]byte(s[0:3])))<<32) | uint64(binary.NativeEndian.Uint32([]byte(s[2:5]))), 0) | |
| case 8: | |
| h = f.hash(8, binary.NativeEndian.Uint64([]byte(s[0:7])), 0) | |
| default: | |
| h = f.hash(len(s), binary.NativeEndian.Uint64([]byte(s[0:7])), binary.NativeEndian.Uint64([]byte(s[len(s)-8:len(s)-1]))) | |
| } | |
| idx := int(h & f.m) | |
| cs := f.table[idx] | |
| if cs != s { | |
| return "" | |
| } | |
| return cs | |
| } | |
| func (f *fastmap) hash(_l int, a uint64, b uint64) uint64 { | |
| l := uint64(_l) | |
| h0 := (l ^ f.s) * p0 | |
| h1 := bsw(l + c1) * p1 | |
| h2 := (a + f.s) * p2 | |
| h3 := bsw(a ^ c3) * p3 | |
| h4 := bsw(b ^ f.s) * p4 | |
| h5 := (b + c5) * p5 | |
| return ((h0 + h4) ^ h2) - ((h1 + h5) ^ h3) | |
| } | |
| func (f *fastmap) getbytes(s []byte) string { | |
| var h uint64 | |
| switch len(s) { | |
| case 0: | |
| return "" | |
| case 1: | |
| idx := s[0] | |
| return byteStrings[idx:idx+1] | |
| case 2: | |
| h = f.hash(2, uint64(binary.NativeEndian.Uint16(s[0:1])), 0) | |
| case 3: | |
| h = f.hash(3, (uint64(binary.NativeEndian.Uint16(s[0:1]))<<8) | uint64(s[2]), 0) | |
| case 4: | |
| h = f.hash(4, uint64(binary.NativeEndian.Uint32(s[0:3])), 0) | |
| case 5: | |
| h = f.hash(5, (uint64(binary.NativeEndian.Uint32(s[0:3]))<<8) | uint64(s[4]), 0) | |
| case 6: | |
| h = f.hash(6, (uint64(binary.NativeEndian.Uint32(s[0:3]))<<32) | uint64(binary.NativeEndian.Uint32(s[1:4])), 0) | |
| case 7: | |
| h = f.hash(7, (uint64(binary.NativeEndian.Uint32(s[0:3]))<<32) | uint64(binary.NativeEndian.Uint32(s[2:5])), 0) | |
| case 8: | |
| h = f.hash(8, binary.NativeEndian.Uint64(s[0:7]), 0) | |
| default: | |
| h = f.hash(len(s), binary.NativeEndian.Uint64(s[0:7]), binary.NativeEndian.Uint64(s[len(s)-8:len(s)-1])) | |
| } | |
| idx := int(h & f.m) | |
| cs := f.table[idx] | |
| if cs != string(s) { | |
| return "" | |
| } | |
| return cs | |
| } | |
| func bsw(n uint64) uint64 { | |
| return bits.ReverseBytes64(n) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment