Created
August 31, 2016 08:21
-
-
Save boatgm/6979be49d0cb94716bddd9f79ec97875 to your computer and use it in GitHub Desktop.
BloomFilter.js
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import { h32 as xxhash } from 'xxhashjs'; | |
| import crypto from 'crypto'; | |
| const LN2_SQUARED = Math.LN2 * Math.LN2; | |
| export default class BloomFilter { | |
| constructor(options = {}){ | |
| this.init(options); | |
| } | |
| init(options){ | |
| if(options.seeds){ | |
| this.seeds = options.seeds; | |
| this.hashes = options.seeds.length; | |
| } else { | |
| this.seeds = []; | |
| this.hashes = options.hashes || 0; | |
| this._generateSeeds(); | |
| } | |
| this.bits = options.bits || 1024; | |
| this.buffer = Buffer.alloc(Math.ceil(this.bits / 8)); | |
| this.clear(); | |
| } | |
| static optimize(itemCount, errorRate = 0.005){ | |
| let bits = Math.round(-1 * itemCount * Math.log(errorRate) / LN2_SQUARED); | |
| let hashes = Math.round((bits / itemCount) * Math.LN2); | |
| return { | |
| bits, | |
| hashes | |
| }; | |
| } | |
| static createOptimal(itemCount, errorRate){ | |
| let opts = this.optimize(itemCount, errorRate); | |
| return new this(opts); | |
| } | |
| clear(){ | |
| this.buffer.fill(0); | |
| } | |
| _generateSeeds(){ | |
| if(!this.seeds) this.seeds = []; | |
| for(let i = 0; i < this.hashes; ++i){ | |
| let buf = crypto.randomBytes(4); | |
| this.seeds[i] = buf.readUInt32LE(0); | |
| for(let j = 0; j < i; ++j){ | |
| if(this.seeds[i] === this.seeds[j]){ | |
| --i; | |
| break; | |
| } | |
| } | |
| } | |
| } | |
| add(buf) { | |
| if(Array.isArray(buf)){ | |
| for(let item of buf){ | |
| this.add(item); | |
| } | |
| } else { | |
| buf = Buffer.from(buf); | |
| for(let i = 0; i < this.hashes; ++i){ | |
| let hash = xxhash(buf, this.seeds[i]).toString(); | |
| let bit = hash % this.bits; | |
| this._setBit(bit); | |
| } | |
| } | |
| } | |
| has(item){ | |
| item = Buffer.from(item); | |
| for(let i = 0; i < this.hashes; ++i){ | |
| let hash = xxhash(item, this.seeds[i]).toString(); | |
| let bit = hash % this.bits; | |
| let isInSet = this._getBit(bit); | |
| if(!isInSet) return false; | |
| } | |
| return true; | |
| } | |
| _setBit(bit){ | |
| let pos = Math.floor(bit / 8); | |
| let shift = bit % 8; | |
| let bitField = this.buffer[pos]; | |
| bitField |= (0x1 << shift); | |
| this.buffer[pos] = bitField; | |
| } | |
| _getBit(bit){ | |
| let pos = Math.floor(bit / 8); | |
| let shift = bit % 8; | |
| let bitField = this.buffer[pos]; | |
| return (bitField & (0x1 << shift)) !== 0; | |
| } | |
| } | |
| var filter = new BloomFilter({ hashes: 8, bits: 1024 }); | |
| filter.add(['cat', 'dog', 'coati', 'red panda']); | |
| console.log(filter.has('cat')); | |
| console.log(filter.has('coat')); | |
| console.log(filter.has('null')); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment