Skip to content

Instantly share code, notes, and snippets.

@Lambdanaut
Last active August 25, 2016 02:24
Show Gist options
  • Select an option

  • Save Lambdanaut/ac65a3a6cf5bf5105b27f7084ca6b3bb to your computer and use it in GitHub Desktop.

Select an option

Save Lambdanaut/ac65a3a6cf5bf5105b27f7084ca6b3bb to your computer and use it in GitHub Desktop.
Space-efficient Bloom filter in Python
"""
EXAMPLE
>> bloom = Bloom(1000000, 6) # Instantiate a Bloom filter of size 100,000 with 6 runs through the hash function
>> [bloom.insert(str(x)) for x in range(10000)] # Insert the integers from 0-9999 into the Bloom Filter
>> all([bloom.lookup(str(x)) for x in range(10000)]) # Ensure all values were entered correctly
True
"""
from bitarray import bitarray
import mmh3
class Bloom(object):
def __init__(self, size, hash_count, hash_f=mmh3.hash):
self.size = size
self.hash_count = hash_count
self.hash_f = hash_f
self.ba = bitarray(size)
self.ba.setall(0)
def insert(self, value):
for seed in range(self.hash_count):
hashed = self.hash_f(value, seed) % self.size
self.ba[hashed] = 1
def lookup(self, value):
for seed in range(self.hash_count):
hashed = self.hash_f(value, seed) % self.size
if not self.ba[hashed]:
return False
return True
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment