Skip to content

Instantly share code, notes, and snippets.

@cherts
Last active November 19, 2025 14:40
Show Gist options
  • Select an option

  • Save cherts/042fcabee622c1cb83d61308b160ebec to your computer and use it in GitHub Desktop.

Select an option

Save cherts/042fcabee622c1cb83d61308b160ebec to your computer and use it in GitHub Desktop.
Generating test data for Clickhouse
-- Create ReplicatedMergeTree table
CREATE TABLE mydb.my_test_data ON CLUSTER '{cluster}'
(
event_id UUID,
user_id UInt32,
event_time DateTime,
event_type Enum('click' = 1, 'view' = 2, 'purchase' = 3),
amount Decimal(18, 2)
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}')
PARTITION BY toYYYYMM(event_time)
ORDER BY event_time;
-- Create Distributed table
CREATE TABLE mydb.my_test_data_distributed ON CLUSTER '{cluster}' AS mydb.my_test_data
ENGINE = Distributed('{cluster}', mydb, my_test_data, rand())
-- Create mat view
CREATE MATERIALIZED VIEW IF NOT EXISTS mydb.my_test_data_mv ON CLUSTER '{cluster}' TO mydb.my_test_data_distributed AS
SELECT event_id, user_id, event_time, amount FROM mydb.my_test_data_distributed WHERE event_type='view';
-- Insert random data
INSERT INTO mydb.my_test_data_distributed
SELECT
generateUUIDv4() AS event_id,
rand() % 10000 AS user_id,
(toDateTime('2025-12-31 00:00:00') - toIntervalHour(randNormal(0, 3))) - toIntervalDay(1 + rand() % 364) AS event_time,
arrayElement(['click', 'view', 'purchase'], 1 + rand() % 3) AS event_type,
toDecimal32(randUniform(1, 1000),2) AS amount
FROM numbers(100000000);
-- Test data
SELECT count() FROM mydb.my_test_data_distributed;
SELECT event_type, count() FROM mydb.my_test_data_distributed GROUP BY event_type;
SELECT count() FROM mydb.my_test_data_distributed WHERE event_type='purchase' AND event_time<'2025-05-01 00:00:00';
SELECT count() FROM mydb.my_test_data_distributed WHERE event_type='view' AND event_time<'2025-05-01 00:00:00';
SELECT count() FROM mydb.my_test_data_distributed WHERE event_type='click' AND event_time<'2025-05-01 00:00:00';
-- Remove table
DROP VIEW mydb.my_test_data_mv ON CLUSTER '{cluster}';
DROP TABLE mydb.my_test_data_distributed ON CLUSTER '{cluster}';
DROP TABLE mydb.my_test_data ON CLUSTER '{cluster}';
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment