Skip to content

Instantly share code, notes, and snippets.

@Anexen
Created July 24, 2024 07:06
Show Gist options
  • Select an option

  • Save Anexen/0b63f634690330ca0e637783a28febd2 to your computer and use it in GitHub Desktop.

Select an option

Save Anexen/0b63f634690330ca0e637783a28febd2 to your computer and use it in GitHub Desktop.
Bloom filter (numpy)
import numpy as np
from cityhash import CityHash64WithSeed
H = np.vectorize(CityHash64WithSeed, otypes=['uint64'])
class BloomFilter:
def __init__(self, n: int, fp_prob: float) -> None:
self.m = np.ceil(-(n * np.log(fp_prob)) / (np.log(2) ** 2)).astype(int)
self.k = np.round((self.m / n) * np.log(2)).astype(int)
self.bloom = np.zeros(self.m, dtype=bool)
def add(self, value: bytes) -> None:
self.bloom[self._get_indices(value)] = 1
def __contains__(self, value: bytes) -> bool:
return not np.any(self.bloom[self._get_indices(value)] == 0)
def _get_indices(self, value) -> np.ndarray[int]:
return H(value, np.arange(self.k)) % self.m
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment