Created
January 25, 2026 09:31
-
-
Save yongkangc/b2c749c18a1907ea77187788bf01a507 to your computer and use it in GitHub Desktop.
RocksDB block size benchmark for reth
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| [package] | |
| name = "rocksdb-blocksize-bench" | |
| version = "0.1.0" | |
| edition = "2021" | |
| [dependencies] | |
| rocksdb = "0.22" | |
| clap = { version = "4", features = ["derive"] } | |
| hdrhistogram = "7" | |
| serde = { version = "1", features = ["derive"] } | |
| serde_json = "1" | |
| rand = "0.8" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| use clap::{Parser, ValueEnum}; | |
| use hdrhistogram::Histogram; | |
| use rand::{Rng, SeedableRng}; | |
| use rand::rngs::StdRng; | |
| use rocksdb::{DB, Options, BlockBasedOptions, WriteBatch, IteratorMode}; | |
| use serde::Serialize; | |
| use std::path::PathBuf; | |
| use std::time::Instant; | |
| #[derive(Parser)] | |
| #[command(name = "rocksdb-blocksize-bench")] | |
| #[command(about = "Benchmark RocksDB with different block sizes")] | |
| struct Args { | |
| #[arg(long, default_value = "4096")] | |
| block_size: u64, | |
| #[arg(long, value_enum)] | |
| op: Operation, | |
| #[arg(long, default_value = "100000")] | |
| num_keys: u64, | |
| #[arg(long)] | |
| db_path: PathBuf, | |
| #[arg(long, default_value = "100")] | |
| range_scan_keys: u64, | |
| #[arg(long, default_value = "1000")] | |
| batch_size: u64, | |
| } | |
| #[derive(Clone, ValueEnum)] | |
| enum Operation { | |
| PointRead, | |
| RangeScan, | |
| Write, | |
| } | |
| #[derive(Serialize)] | |
| struct BenchResult { | |
| block_size: u64, | |
| operation: String, | |
| num_keys: u64, | |
| ops_per_sec: f64, | |
| latency_p50_us: u64, | |
| latency_p99_us: u64, | |
| latency_p999_us: u64, | |
| latency_mean_us: f64, | |
| block_cache_hit_rate: Option<f64>, | |
| total_time_secs: f64, | |
| } | |
| fn create_b256_key(index: u64) -> [u8; 32] { | |
| let mut key = [0u8; 32]; | |
| key[24..32].copy_from_slice(&index.to_be_bytes()); | |
| let hash = simple_hash(index); | |
| key[0..8].copy_from_slice(&hash.to_be_bytes()); | |
| key | |
| } | |
| fn simple_hash(x: u64) -> u64 { | |
| let mut h = x; | |
| h = h.wrapping_mul(0x517cc1b727220a95); | |
| h ^= h >> 33; | |
| h = h.wrapping_mul(0x9e3779b97f4a7c15); | |
| h ^= h >> 33; | |
| h | |
| } | |
| fn create_value(size: usize) -> Vec<u8> { | |
| vec![0xAB; size] | |
| } | |
| fn open_db(path: &PathBuf, block_size: u64, create: bool) -> DB { | |
| let mut block_opts = BlockBasedOptions::default(); | |
| block_opts.set_block_size(block_size as usize); | |
| block_opts.set_cache_index_and_filter_blocks(true); | |
| block_opts.set_bloom_filter(10.0, false); | |
| let mut opts = Options::default(); | |
| opts.create_if_missing(create); | |
| opts.set_block_based_table_factory(&block_opts); | |
| opts.enable_statistics(); | |
| opts.set_compression_type(rocksdb::DBCompressionType::Lz4); | |
| DB::open(&opts, path).expect("Failed to open RocksDB") | |
| } | |
| fn populate_db(db: &DB, num_keys: u64, batch_size: u64) { | |
| println!("Populating DB with {} keys...", num_keys); | |
| let value = create_value(64); | |
| let mut batch = WriteBatch::default(); | |
| for i in 0..num_keys { | |
| let key = create_b256_key(i); | |
| batch.put(key, &value); | |
| if (i + 1) % batch_size == 0 { | |
| db.write(batch).expect("Write failed"); | |
| batch = WriteBatch::default(); | |
| } | |
| } | |
| if !batch.is_empty() { | |
| db.write(batch).expect("Write failed"); | |
| } | |
| db.flush().expect("Flush failed"); | |
| println!("Population complete."); | |
| } | |
| fn bench_point_read(db: &DB, num_keys: u64, existing_keys: u64) -> (Histogram<u64>, u64) { | |
| let mut histogram = Histogram::<u64>::new(3).unwrap(); | |
| let mut rng = StdRng::seed_from_u64(42); | |
| let mut hits = 0u64; | |
| for _ in 0..num_keys { | |
| let idx = rng.gen_range(0..existing_keys); | |
| let key = create_b256_key(idx); | |
| let start = Instant::now(); | |
| if db.get(&key).expect("Get failed").is_some() { | |
| hits += 1; | |
| } | |
| let elapsed = start.elapsed().as_micros() as u64; | |
| histogram.record(elapsed.max(1)).ok(); | |
| } | |
| (histogram, hits) | |
| } | |
| fn bench_range_scan(db: &DB, num_scans: u64, keys_per_scan: u64, existing_keys: u64) -> Histogram<u64> { | |
| let mut histogram = Histogram::<u64>::new(3).unwrap(); | |
| let mut rng = StdRng::seed_from_u64(42); | |
| for _ in 0..num_scans { | |
| let start_idx = rng.gen_range(0..existing_keys.saturating_sub(keys_per_scan)); | |
| let start_key = create_b256_key(start_idx); | |
| let start = Instant::now(); | |
| let iter = db.iterator(IteratorMode::From(&start_key, rocksdb::Direction::Forward)); | |
| let mut count = 0u64; | |
| for item in iter { | |
| let _ = item.expect("Iterator error"); | |
| count += 1; | |
| if count >= keys_per_scan { | |
| break; | |
| } | |
| } | |
| let elapsed = start.elapsed().as_micros() as u64; | |
| histogram.record(elapsed.max(1)).ok(); | |
| } | |
| histogram | |
| } | |
| fn bench_write(db: &DB, num_batches: u64, batch_size: u64, start_key_offset: u64) -> Histogram<u64> { | |
| let mut histogram = Histogram::<u64>::new(3).unwrap(); | |
| let value = create_value(64); | |
| for batch_idx in 0..num_batches { | |
| let mut batch = WriteBatch::default(); | |
| for i in 0..batch_size { | |
| let key_idx = start_key_offset + batch_idx * batch_size + i; | |
| let key = create_b256_key(key_idx); | |
| batch.put(key, &value); | |
| } | |
| let start = Instant::now(); | |
| db.write(batch).expect("Write failed"); | |
| let elapsed = start.elapsed().as_micros() as u64; | |
| histogram.record(elapsed.max(1)).ok(); | |
| } | |
| histogram | |
| } | |
| fn parse_cache_stats(stats_str: &str) -> Option<f64> { | |
| let mut hits: Option<u64> = None; | |
| let mut misses: Option<u64> = None; | |
| for line in stats_str.lines() { | |
| if line.contains("rocksdb.block.cache.hit COUNT") { | |
| if let Some(count) = line.split_whitespace().last().and_then(|s: &str| s.parse::<u64>().ok()) { | |
| hits = Some(count); | |
| } | |
| } | |
| if line.contains("rocksdb.block.cache.miss COUNT") { | |
| if let Some(count) = line.split_whitespace().last().and_then(|s: &str| s.parse::<u64>().ok()) { | |
| misses = Some(count); | |
| } | |
| } | |
| } | |
| match (hits, misses) { | |
| (Some(h), Some(m)) if h + m > 0 => Some(h as f64 / (h + m) as f64), | |
| _ => None, | |
| } | |
| } | |
| fn main() { | |
| let args = Args::parse(); | |
| let db = match args.op { | |
| Operation::Write => open_db(&args.db_path, args.block_size, true), | |
| _ => { | |
| let current_file = args.db_path.join("CURRENT"); if !current_file.exists() { | |
| let db = open_db(&args.db_path, args.block_size, true); | |
| populate_db(&db, args.num_keys, args.batch_size); | |
| drop(db); | |
| } | |
| open_db(&args.db_path, args.block_size, true) | |
| } | |
| }; | |
| let start_time = Instant::now(); | |
| let (histogram, op_name, op_count) = match args.op { | |
| Operation::PointRead => { | |
| let (hist, _hits) = bench_point_read(&db, args.num_keys, args.num_keys); | |
| (hist, "point_read", args.num_keys) | |
| } | |
| Operation::RangeScan => { | |
| let num_scans = args.num_keys / args.range_scan_keys; | |
| let hist = bench_range_scan(&db, num_scans, args.range_scan_keys, args.num_keys); | |
| (hist, "range_scan", num_scans) | |
| } | |
| Operation::Write => { | |
| let num_batches = args.num_keys / args.batch_size; | |
| let hist = bench_write(&db, num_batches, args.batch_size, 0); | |
| (hist, "write", num_batches) | |
| } | |
| }; | |
| let total_time = start_time.elapsed(); | |
| let stats_str = db.property_value("rocksdb.stats").ok().flatten().unwrap_or_default(); | |
| let cache_hit_rate = parse_cache_stats(&stats_str); | |
| let result = BenchResult { | |
| block_size: args.block_size, | |
| operation: op_name.to_string(), | |
| num_keys: args.num_keys, | |
| ops_per_sec: op_count as f64 / total_time.as_secs_f64(), | |
| latency_p50_us: histogram.value_at_quantile(0.50), | |
| latency_p99_us: histogram.value_at_quantile(0.99), | |
| latency_p999_us: histogram.value_at_quantile(0.999), | |
| latency_mean_us: histogram.mean(), | |
| block_cache_hit_rate: cache_hit_rate, | |
| total_time_secs: total_time.as_secs_f64(), | |
| }; | |
| println!("{}", serde_json::to_string_pretty(&result).unwrap()); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment