Skip to content

Instantly share code, notes, and snippets.

@prettyirrelevant
Created January 27, 2026 18:18
Show Gist options
  • Select an option

  • Save prettyirrelevant/a7589d32cf9a22e21ec88c4a6deb9755 to your computer and use it in GitHub Desktop.

Select an option

Save prettyirrelevant/a7589d32cf9a22e21ec88c4a6deb9755 to your computer and use it in GitHub Desktop.
Benchmark: SQLite composite indexes vs single-column indexes for event_metrics table (rotki PR #11464)
#!/usr/bin/env python3
"""Benchmark event_metrics indexes.
Usage:
python script.py setup # Create base DB with data (no indexes)
python script.py index single # Add single-column indexes
python script.py index composite # Add composite indexes
python script.py query single # Query single-indexed DB
python script.py query composite # Query composite-indexed DB
"""
import random
import shutil
import sqlite3
import sys
from pathlib import Path
NUM_ROWS = 500_000
DB_DIR = Path('/tmp/benchmark_indexes')
SCHEMA = """
CREATE TABLE history_events (
identifier INTEGER PRIMARY KEY, sequence_index INTEGER, timestamp INTEGER, location CHAR(1), ignored INTEGER DEFAULT 0
);
CREATE TABLE event_metrics (
id INTEGER PRIMARY KEY, event_identifier INTEGER, location_label TEXT, protocol TEXT, metric_key TEXT, metric_value TEXT, asset TEXT
);
"""
SINGLE_INDEXES = """
CREATE INDEX idx_em_event ON event_metrics(event_identifier);
CREATE INDEX idx_em_metric_key ON event_metrics(metric_key);
CREATE INDEX idx_em_asset ON event_metrics(asset);
"""
COMPOSITE_INDEXES = """
CREATE INDEX idx_em_event ON event_metrics(event_identifier);
CREATE INDEX idx_em_metric_key ON event_metrics(metric_key);
CREATE INDEX idx_em_asset ON event_metrics(asset);
CREATE INDEX idx_em_metric_key_bucket ON event_metrics(metric_key, location_label, protocol, asset);
CREATE INDEX idx_em_metric_key_event ON event_metrics(metric_key, event_identifier);
"""
GET_BALANCES = """
SELECT asset, SUM(metric_value) FROM (
SELECT em.asset, em.metric_value, MAX(he.timestamp + he.sequence_index)
FROM event_metrics em JOIN history_events he ON em.event_identifier = he.identifier
WHERE he.ignored = 0 AND em.metric_key = 'balance' AND he.timestamp <= 1700250000000
GROUP BY he.location, em.location_label, em.protocol, em.asset
) GROUP BY asset HAVING SUM(metric_value) > 0
"""
def setup():
"""Create base database with data only, no indexes."""
DB_DIR.mkdir(exist_ok=True)
base_db = DB_DIR / 'base.db'
base_db.unlink(missing_ok=True)
print(f'Creating base DB with {NUM_ROWS:,} rows...')
conn = sqlite3.connect(base_db)
conn.executescript(SCHEMA)
random.seed(42)
base_ts = 1700000000000
conn.executemany('INSERT INTO history_events VALUES (?,?,?,?,?)',
[(i, i%10, base_ts+i*1000, 'ABCDEFGHIJ'[i%10], i%5==0) for i in range(1, NUM_ROWS+1)])
conn.executemany('INSERT INTO event_metrics VALUES (?,?,?,?,?,?,?)',
[(i, i, f'0x{i%100:040x}', f'proto_{i%20}' if i%5 else None,
'balance', str(random.uniform(-1000,10000)), f'asset_{i%50}') for i in range(1, NUM_ROWS+1)])
conn.commit()
conn.close()
print(f'Created {base_db} ({base_db.stat().st_size / 1024 / 1024:.1f} MB)')
def create_indexed_db(mode: str):
"""Copy base DB and add indexes."""
base_db = DB_DIR / 'base.db'
target_db = DB_DIR / f'{mode}.db'
if not base_db.exists():
print('Run setup first!')
return
shutil.copy(base_db, target_db)
conn = sqlite3.connect(target_db)
indexes = COMPOSITE_INDEXES if mode == 'composite' else SINGLE_INDEXES
conn.executescript(indexes)
conn.execute('ANALYZE')
conn.commit()
conn.close()
print(f'Created {target_db} with {mode} indexes')
def query(mode: str):
"""Run query against pre-indexed database."""
db_path = DB_DIR / f'{mode}.db'
conn = sqlite3.connect(db_path)
conn.execute(GET_BALANCES).fetchall()
conn.close()
def main():
if len(sys.argv) < 2:
print(__doc__)
return
cmd = sys.argv[1]
if cmd == 'setup':
setup()
elif cmd == 'index':
mode = sys.argv[2] if len(sys.argv) > 2 else 'single'
create_indexed_db(mode)
elif cmd == 'query':
mode = sys.argv[2] if len(sys.argv) > 2 else 'single'
query(mode)
else:
print(__doc__)
if __name__ == '__main__':
main()

SQLite Index Benchmark: Composite vs Single-Column

Benchmark comparing composite indexes (from rotki PR #11464) against single-column indexes for the event_metrics table.

Index Configurations

Single-Column Indexes (Baseline)

CREATE INDEX idx_em_event ON event_metrics(event_identifier);
CREATE INDEX idx_em_metric_key ON event_metrics(metric_key);
CREATE INDEX idx_em_asset ON event_metrics(asset);

Composite Indexes (PR #11464)

-- Same as above, plus:
CREATE INDEX idx_em_metric_key_bucket ON event_metrics(metric_key, location_label, protocol, asset);
CREATE INDEX idx_em_metric_key_event ON event_metrics(metric_key, event_identifier);

Query Tested

From rotkehlchen/balances/historical.py get_balances():

SELECT asset, SUM(metric_value) FROM (
    SELECT em.asset, em.metric_value, MAX(he.timestamp + he.sequence_index)
    FROM event_metrics em JOIN history_events he ON em.event_identifier = he.identifier
    WHERE he.ignored = 0 AND em.metric_key = 'balance' AND he.timestamp <= ?
    GROUP BY he.location, em.location_label, em.protocol, em.asset
) GROUP BY asset HAVING SUM(metric_value) > 0

Results

100k Rows

Command Mean [ms] Min [ms] Max [ms]
query single 52.2 ± 0.4 51.5 53.9
query composite 56.1 ± 0.6 55.2 58.8

Summary: Single-column indexes 1.07x faster

500k Rows

Command Mean [ms] Min [ms] Max [ms]
query single 190.9 ± 2.1 186.8 194.3
query composite 212.0 ± 2.7 207.5 218.3

Summary: Single-column indexes 1.11x faster

Query Plan Analysis

Both configurations produce identical query plans:

QUERY PLAN
|--CO-ROUTINE (subquery-1)
|  |--SEARCH em USING INDEX idx_em_metric_key (metric_key=?)
|  |--SEARCH he USING INTEGER PRIMARY KEY (rowid=?)
|  `--USE TEMP B-TREE FOR GROUP BY
|--SCAN (subquery-1)
`--USE TEMP B-TREE FOR GROUP BY

SQLite only uses idx_em_metric_key in both cases. The composite indexes are not utilized.

Conclusion

The composite indexes from PR #11464 add ~10% overhead without query performance benefit for the get_balances query. The GROUP BY spans both tables (history_events.location + event_metrics.*), so the composite index cannot provide a covering index advantage.

Environment

  • SQLite (Python sqlite3 module)
  • macOS
  • Benchmarked with hyperfine
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment