Last active
November 19, 2025 14:40
-
-
Save cherts/042fcabee622c1cb83d61308b160ebec to your computer and use it in GitHub Desktop.
Generating test data for Clickhouse
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- Create ReplicatedMergeTree table | |
| CREATE TABLE mydb.my_test_data ON CLUSTER '{cluster}' | |
| ( | |
| event_id UUID, | |
| user_id UInt32, | |
| event_time DateTime, | |
| event_type Enum('click' = 1, 'view' = 2, 'purchase' = 3), | |
| amount Decimal(18, 2) | |
| ) | |
| ENGINE = ReplicatedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}') | |
| PARTITION BY toYYYYMM(event_time) | |
| ORDER BY event_time; | |
| -- Create Distributed table | |
| CREATE TABLE mydb.my_test_data_distributed ON CLUSTER '{cluster}' AS mydb.my_test_data | |
| ENGINE = Distributed('{cluster}', mydb, my_test_data, rand()) | |
| -- Create mat view | |
| CREATE MATERIALIZED VIEW IF NOT EXISTS mydb.my_test_data_mv ON CLUSTER '{cluster}' TO mydb.my_test_data_distributed AS | |
| SELECT event_id, user_id, event_time, amount FROM mydb.my_test_data_distributed WHERE event_type='view'; | |
| -- Insert random data | |
| INSERT INTO mydb.my_test_data_distributed | |
| SELECT | |
| generateUUIDv4() AS event_id, | |
| rand() % 10000 AS user_id, | |
| (toDateTime('2025-12-31 00:00:00') - toIntervalHour(randNormal(0, 3))) - toIntervalDay(1 + rand() % 364) AS event_time, | |
| arrayElement(['click', 'view', 'purchase'], 1 + rand() % 3) AS event_type, | |
| toDecimal32(randUniform(1, 1000),2) AS amount | |
| FROM numbers(100000000); | |
| -- Test data | |
| SELECT count() FROM mydb.my_test_data_distributed; | |
| SELECT event_type, count() FROM mydb.my_test_data_distributed GROUP BY event_type; | |
| SELECT count() FROM mydb.my_test_data_distributed WHERE event_type='purchase' AND event_time<'2025-05-01 00:00:00'; | |
| SELECT count() FROM mydb.my_test_data_distributed WHERE event_type='view' AND event_time<'2025-05-01 00:00:00'; | |
| SELECT count() FROM mydb.my_test_data_distributed WHERE event_type='click' AND event_time<'2025-05-01 00:00:00'; | |
| -- Remove table | |
| DROP VIEW mydb.my_test_data_mv ON CLUSTER '{cluster}'; | |
| DROP TABLE mydb.my_test_data_distributed ON CLUSTER '{cluster}'; | |
| DROP TABLE mydb.my_test_data ON CLUSTER '{cluster}'; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment