Created
October 23, 2025 03:46
-
-
Save adinsmoor/e677a362ea1ec7e8025950a2fca6f8c8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- 1) summary output below across all tables | |
| with dbt_objects as ( | |
| -- get list of tables maintained by dbt | |
| SELECT | |
| t.source_table_full_name, | |
| count(distinct h.statement_id) as dbt_query_count | |
| FROM system.query.history AS h | |
| LEFT JOIN system.access.table_lineage AS t | |
| ON t.statement_id = h.cache_origin_statement_id | |
| WHERE | |
| -- h.client_application = 'Databricks Dbt' | |
| -- AND | |
| t.event_time > CURRENT_TIMESTAMP() - INTERVAL 3 DAYS | |
| and source_table_full_name = 'sa_dev.tpch.supplier' | |
| -- AND statement_type IN ('SELECT','CREATE','REPLACE','UPDATE','MERGE','INSERT') | |
| GROUP BY 1 | |
| -- ORDER BY 2 DESC | |
| -- LIMIT 100 | |
| ), | |
| all_objects as ( | |
| SELECT | |
| source_table_full_name, | |
| COUNT(distinct event_id) AS all_queries | |
| FROM | |
| system.access.table_lineage t | |
| WHERE | |
| t.event_date > CURRENT_DATE() - INTERVAL 3 DAYS | |
| AND t.source_table_full_name IS NOT NULL | |
| GROUP BY source_table_full_name | |
| -- ORDER BY 2 DESC | |
| -- LIMIT 100 | |
| ) | |
| SELECT | |
| do.source_table_full_name, | |
| ao.all_queries, | |
| do.dbt_query_count, | |
| (ao.all_queries - do.dbt_query_count) as non_dbt_queries | |
| FROM dbt_objects as do | |
| LEFT JOIN all_objects as ao on do.source_table_full_name = ao.source_table_full_name | |
| ORDER BY 4 ASC | |
| ; | |
| -- 2) check single table as example | |
| -- lineage results | |
| SELECT * | |
| FROM system.access.table_lineage t | |
| WHERE t.event_date > CURRENT_DATE() - INTERVAL 3 DAYS | |
| and source_table_full_name = 'sa_dev.tpch.supplier' | |
| ; | |
| -- history results | |
| SELECT | |
| t.source_table_full_name, | |
| statement_type, | |
| client_application, | |
| count(distinct h.statement_id) as dbt_query_count | |
| FROM system.query.history AS h | |
| LEFT JOIN system.access.table_lineage AS t | |
| ON t.statement_id = h.cache_origin_statement_id | |
| WHERE | |
| h.client_application = 'Databricks Dbt' | |
| AND t.event_time > CURRENT_TIMESTAMP() - INTERVAL 3 DAYS | |
| and source_table_full_name = 'sa_dev.tpch.supplier' | |
| GROUP BY 1,2,3 | |
| ; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment