Skip to content

Instantly share code, notes, and snippets.

@jster1357
Last active June 26, 2025 15:25
Show Gist options
  • Select an option

  • Save jster1357/3e309c1a87a0a16f9925ca17a00d969e to your computer and use it in GitHub Desktop.

Select an option

Save jster1357/3e309c1a87a0a16f9925ca17a00d969e to your computer and use it in GitHub Desktop.
TPCH SQL for BigQuery
SELECT l_returnflag, l_linestatus, SUM(l_quantity) AS SUM_qty, SUM(l_extENDedprice) AS SUM_base_price, SUM(l_extENDedprice * (1-l_discount)) AS SUM_disc_price, SUM(l_extENDedprice * (1-l_discount) * (1 + l_tax)) AS SUM_charge, avg(l_quantity) AS avg_qty, avg(l_extENDedprice) AS avg_price, avg(l_discount) AS avg_disc, count(*) AS count_order FROM tpch_optimized.lineitem WHERE l_shipdate <= DATE_ADD(DATE "1996-12-01", INTERVAL 60 DAY) GROUP BY l_returnflag, l_linestatus ORDER BY l_returnflag, l_linestatus;
SELECT s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment FROM tpch_optimized.part, tpch_optimized.supplier, tpch_optimized.partsupp, tpch_optimized.nation, tpch_optimized.region WHERE p_partkey = ps_partkey AND s_suppkey = ps_suppkey AND p_size > 15 AND p_type like '%BRASS' AND s_nationkey = n_nationkey AND n_regionkey = r_regionkey AND r_name = 'EUROPE' AND ps_supplycost = ( SELECT MIN(ps_supplycost) FROM tpch_optimized.partsupp, tpch_optimized.supplier, tpch_optimized.nation, tpch_optimized.region WHERE p_partkey = ps_partkey AND s_suppkey = ps_suppkey AND s_nationkey = n_nationkey AND n_regionkey = r_regionkey AND r_name = 'EUROPE' ) ORDER BY s_acctbal DESC, n_name, s_name, p_partkey;
SELECT l_orderkey, SUM(l_extENDedprice * (1-l_discount)) AS revenue, o_orderdate, o_shippriority FROM tpch_optimized.customer, tpch_optimized.orders, tpch_optimized.lineitem WHERE c_mktsegment = 'MACHINERY' AND c_custkey = o_custkey AND l_orderkey = o_orderkey AND o_orderdate < date '1999-03-21' AND l_shipdate > date '1992-03-21' GROUP BY l_orderkey, o_orderdate, o_shippriority ORDER BY revenue DESC, o_orderdate;
SELECT o_orderpriority, count(*) AS order_count FROM tpch_optimized.orders WHERE o_orderdate >= date '1996-03-01' AND o_orderdate < DATE_ADD(DATE "1996-03-01", INTERVAL 3 MONTH) AND exists ( SELECT * FROM tpch_optimized.lineitem WHERE l_orderkey = o_orderkey AND l_commitdate < l_receiptdate )GROUP BY o_orderpriority ORDER BY o_orderpriority;
SELECT n_name, SUM(l_extENDedprice * (1-l_discount)) AS revenue FROM tpch_optimized.customer, tpch_optimized.orders, tpch_optimized.lineitem, tpch_optimized.supplier, tpch_optimized.nation, tpch_optimized.region WHERE c_custkey = o_custkey AND l_orderkey = o_orderkey AND l_suppkey = s_suppkey AND c_nationkey = s_nationkey AND s_nationkey = n_nationkey AND n_regionkey = r_regionkey AND r_name = 'EUROPE' AND o_orderdate >= date '1997-01-01' AND o_orderdate < DATE_ADD(DATE "1997-01-01", INTERVAL 1 YEAR) GROUP BY n_name ORDER BY revenue DESC;
SELECT SUM(l_extENDedprice * l_discount) AS revenue FROM tpch_optimized.lineitem WHERE l_shipdate >= date '1997-01-01' AND l_shipdate < DATE_ADD(DATE "1997-01-01", INTERVAL 1 YEAR) AND l_discount between 0.07-0.01 AND 0.07 + 0.01 AND l_quantity < 24;
SELECT supp_nation, cust_nation, l_year, SUM(volume) AS revenue FROM (SELECT n1.n_name AS supp_nation, n2.n_name AS cust_nation, extract(year FROM l_shipdate) AS l_year, l_extENDedprice * (1-l_discount) AS volume FROM tpch_optimized.supplier, tpch_optimized.lineitem, tpch_optimized.orders, tpch_optimized.customer, tpch_optimized.nation n1, tpch_optimized.nation n2 WHERE s_suppkey = l_suppkey AND o_orderkey = l_orderkey AND c_custkey = o_custkey AND s_nationkey = n1.n_nationkey AND c_nationkey = n2.n_nationkey AND ((n1.n_name = 'PERU' AND n2.n_name = 'IRAQ') or ( n1.n_name = 'IRAQ' AND n2.n_name = 'PERU' ) AND l_shipdate between date '1995-01-01' AND date '1996-12-31' )) AS shipping GROUP BY supp_nation, cust_nation, l_year ORDER BY supp_nation, cust_nation, l_year;
SELECT o_year, SUM( CASE WHEN nation = 'IRAQ' then volume else 0 END) / SUM(volume) AS mkt_share FROM (SELECT extract(year FROM o_orderdate) AS o_year, l_extENDedprice * (1-l_discount) AS volume, n2.n_name AS nation FROM tpch_optimized.part, tpch_optimized.supplier, tpch_optimized.lineitem, tpch_optimized.orders, tpch_optimized.customer, tpch_optimized.nation n1, tpch_optimized.nation n2, tpch_optimized.region WHERE l_orderkey = o_orderkey AND p_partkey = l_partkey AND s_suppkey = l_suppkey AND o_custkey = c_custkey AND c_nationkey = n1.n_nationkey AND n1.n_regionkey = r_regionkey AND r_name = 'MIDDLE EAST' AND s_nationkey = n2.n_nationkey AND o_orderdate between date '1995-01-01' AND date '1996-12-31' AND p_type = 'STANDARD ANODIZED BRASS' )as all_nations GROUP BY o_year ORDER BY o_year;
SELECT nation, o_year, SUM(amount) AS SUM_profit FROM( SELECT n_name AS nation, extract(year FROM o_orderdate) AS o_year, l_extENDedprice * (1-l_discount)-ps_supplycost * l_quantity AS amount FROM tpch_optimized.part, tpch_optimized.supplier, tpch_optimized.lineitem, tpch_optimized.partsupp, tpch_optimized.orders, tpch_optimized.nation WHERE s_suppkey = l_suppkey AND ps_partkey = l_partkey AND ps_suppkey = l_suppkey AND p_partkey = l_partkey AND o_orderkey = l_orderkey AND s_nationkey = n_nationkey AND p_name like '%antique%')as profit GROUP BY nation, o_year ORDER BY nation, o_year DESC;
WITH revenue AS ( SELECT l_suppkey AS supplier_no, SUM(l_extENDedprice * (1-l_discount)) AS total_revenue FROM tpch_optimized.lineitem WHERE l_shipdate >= date '1995-06-01' AND l_shipdate < DATE_ADD(DATE "1995-06-01", INTERVAL 3 MONTH) GROUP BY l_suppkey)SELECT s_suppkey, s_name, s_address, s_phone, total_revenue FROM tpch_optimized.supplier, revenue WHERE s_suppkey = supplier_no AND total_revenue = ( SELECT MAX(total_revenue) FROM revenue ORDER BY s_suppkey);
SELECT c_custkey, c_name, SUM(l_extENDedprice*(1-l_discount)) AS revenue, c_acctbal, n_name, c_address, c_phone, c_comment FROM ( SELECT c_custkey, c_name, n_name, c_acctbal, c_address, c_phone, c_comment, l_extENDedprice, l_discount FROM ( SELECT c_custkey, c_name, c_acctbal, c_address, c_phone, c_comment, c_nationkey, l_extENDedprice, l_discount FROM ( SELECT c_custkey, c_name, c_acctbal, c_address, c_phone, c_comment, c_nationkey, o_orderkey FROM tpch_optimized.customer AS T1 JOIN tpch_optimized.orders AS T2 ON T1.c_custkey = T2.o_custkey WHERE O_OrderDate >= '1993-01-01' AND O_orderdate < DATE_ADD(DATE "1993-01-01", INTERVAL 3 MONTH) ) AS T3 JOIN tpch_optimized.lineitem AS T4 ON T3.o_orderkey = T4.l_orderkey WHERE T4.l_returnflag = 'R' ) AS T5 JOIN tpch_optimized.nation AS T6 ON T5.c_nationkey = T6.n_nationkey ) AS T7 GROUP BY c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment ORDER BY revenue DESC;
SELECT ps_partkey, SUM(ps_supplycost * ps_availqty) AS value FROM tpch_optimized.partsupp, tpch_optimized.supplier, tpch_optimized.nation WHERE ps_suppkey = s_suppkey AND s_nationkey = n_nationkey AND n_name = 'PERU' GROUP BY ps_partkey having SUM(ps_supplycost * ps_availqty) > ( SELECT SUM(ps_supplycost * ps_availqty) * 0.0001000000 FROM tpch_optimized.partsupp, tpch_optimized.supplier, tpch_optimized.nation WHERE ps_suppkey = s_suppkey AND s_nationkey = n_nationkey AND n_name = 'PERU' ) ORDER BY value DESC;
SELECT l_shipmode, SUM( CASE WHEN o_orderpriority = '1-URGENT' or o_orderpriority = '2-HIGH' then 1 else 0 END) AS high_line_count, SUM( CASE WHEN o_orderpriority <> '1-URGENT' AND o_orderpriority <> '2-HIGH' then 1 ELSE 0 END) AS low_line_count FROM tpch_optimized.orders, tpch_optimized.lineitem WHERE o_orderkey = l_orderkey AND l_shipmode IN ( 'AIR', 'RAIL' ) AND l_commitdate < l_receiptdate AND l_shipdate < l_commitdate AND l_receiptdate >= date '1994-01-01' AND l_receiptdate < DATE_ADD(DATE "1994-01-01", INTERVAL 1 YEAR) GROUP BY l_shipmode ORDER BY l_shipmode;
SELECT f2 AS c_count, Count(*) AS custdist FROM (SELECT t1.c_custkey AS f1, Count (t2.o_orderkey) AS f2 FROM tpch_optimized.customer AS t1 LEFT OUTER JOIN tpch_optimized.orders AS t2 ON t1.c_custkey = t2.o_custkey WHERE NOT Regexp_contains(o_comment, '.∗express.∗accounts.∗') GROUP BY f1) GROUP BY f2 ORDER BY custdist DESC, f2 DESC;
SELECT 100.00 * SUM( CASE WHEN p_type like 'PROMO%' then l_extENDedprice * (1-l_discount) else 0 END) / SUM(l_extENDedprice * (1-l_discount)) AS promo_revenue FROM tpch_optimized.lineitem, tpch_optimized.part WHERE l_partkey = p_partkey AND l_shipdate >= date '1994-12-01' AND l_shipdate < DATE_ADD(DATE "1994-12-01", INTERVAL 1 MONTH);
SELECT p_brAND, p_type, p_size, Count (DISTINCT ps_suppkey) AS supplier_cnt FROM tpch_optimized.partsupp AS t1 JOIN tpch_optimized.part AS t2 ON t2.p_partkey = t1.ps_partkey WHERE p_brAND <> 'BrAND#41' AND NOT Regexp_contains (p_type, 'PROMO BRUSHED. ∗') AND p_size IN ( 15, 46, 47, 34, 9, 22, 17, 43 ) AND ps_suppkey NOT IN (SELECT s_suppkey FROM tpch_optimized.supplier WHERE Regexp_contains(s_comment, '.∗Customer.∗Complaints.∗')) GROUP BY p_brAND, p_type, p_size ORDER BY supplier_cnt DESC, p_brAND, p_type, p_size;
SELECT SUM(j1.l_extENDedprice) / 7.0 AS avg_yearly FROM ( SELECT t1.l_quantity, t1.l_extENDedprice, t2.p_partkey, t2.p_brAND, t2.p_container FROM tpch_optimized.lineitem AS t1 JOIN tpch_optimized.part AS t2 ON t2.p_partkey = t1.l_partkey ) AS j1 JOIN ( SELECT l_partkey, 0.2 * Avg(l_quantity) AS average FROM tpch_optimized.lineitem GROUP BY l_partkey ) AS j2 ON j1.p_partkey = j2.l_partkey WHERE j1.p_container = 'LG BAG' AND j1.l_quantity < j2.average;
SELECT c_name AS NAME, c_custkey, o_orderkey, o_orderdate, o_totalprice, SUM(l_quantity) AS l_quant FROM ( SELECT c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, l_quantity FROM ( SELECT c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice FROM tpch_optimized.customer AS T1 JOIN tpch_optimized.orders AS T2 ON T1.c_custkey = T2.o_custkey ) AS T3 JOIN tpch_optimized.lineitem AS T4 ON T3.o_orderkey = T4.l_orderkey ) AS T5 WHERE o_orderkey IN ( SELECT l_orderkey FROM ( SELECT l_orderkey, SUM(l_quantity) FROM tpch_optimized.lineitem GROUP BY l_orderkey HAVING SUM(l_quantity) > 200 ) )GROUP BY c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice ORDER BY o_totalprice DESC, o_orderdate;
SELECT SUM(l_extENDedprice* (1-l_discount)) AS revenue FROM tpch_optimized.lineitem, tpch_optimized.part WHERE p_partkey = l_partkey AND p_container IN ( 'SM CASE', 'SM BOX', 'SM PACK', 'SM PKG' ) AND l_quantity >= 3 AND l_quantity <= 3 + 10 AND p_size BETWEEN 1 AND 5 AND l_shipmode IN ( 'AIR', 'AIR REG' ) AND l_shipinstruct = 'DELIVER IN PERSON' OR p_partkey = l_partkey AND p_container IN ( 'MED BAG', 'MED BOX', 'MED PKG', 'MED PACK' ) AND l_quantity >= 10 AND l_quantity <= 10 + 10 AND p_size BETWEEN 1 AND 10 AND l_shipmode IN ( 'AIR', 'AIR REG' ) AND l_shipinstruct = 'DELIVER IN PERSON' OR p_partkey = l_partkey AND p_container IN ( 'LG CASE', 'LG BOX', 'LG PACK', 'LG PKG' ) AND l_quantity >= 22 AND l_quantity <= 22 + 10 AND p_size BETWEEN 1 AND 15 AND l_shipmode IN ( 'AIR', 'AIR REG' ) AND l_shipinstruct = 'DELIVER IN PERSON';
SELECT s_name, s_address FROM tpch_optimized.supplier, tpch_optimized.nation WHERE s_suppkey IN ( SELECT ps_suppkey FROM tpch_optimized.partsupp WHERE ps_partkey IN ( SELECT p_partkey FROM tpch_optimized.part WHERE p_name like '%chocolate%' ) AND ps_availqty > ( SELECT 0.5 * SUM(l_quantity) FROM tpch_optimized.lineitem WHERE l_partkey = ps_partkey AND l_suppkey = ps_suppkey AND l_shipdate >= date '1991-01-01' AND l_shipdate < DATE_ADD(DATE "1994-01-01", INTERVAL 1 YEAR) )) AND s_nationkey = n_nationkey AND n_name = 'PERU' ORDER BY s_name;
SELECT cntrycode, count(*) AS numcust, SUM(c_acctbal) AS totacctbal FROM( SELECT SUBSTR(c_phone , 1, 2) AS cntrycode, c_acctbal FROM tpch_optimized.customer WHERE SUBSTR(c_phone , 1, 2) IN ( '41','28','39','21','24','29','44' ) AND c_acctbal > ( SELECT avg(c_acctbal) FROM tpch_optimized.customer WHERE c_acctbal > 0.00 AND SUBSTR(c_phone , 1, 2) IN ( '41','28','39','21','24','29','44') ) AND NOT EXISTS ( SELECT * FROM tpch_optimized.orders WHERE o_custkey = c_custkey ) ) AS custsale GROUP BY cntrycode ORDER BY cntrycode;
SELECT cntrycode, count(*) AS numcust, SUM(c_acctbal) AS totacctbal FROM( SELECT SUBSTR(c_phone , 1, 2) AS cntrycode, c_acctbal FROM tpch_optimized.customer WHERE SUBSTR(c_phone , 1, 2) IN ( '41','28','39','21','24','29','44' ) AND c_acctbal > ( SELECT avg(c_acctbal) FROM tpch_optimized.customer WHERE c_acctbal > 0.00 AND SUBSTR(c_phone , 1, 2) IN ( '41','28','39','21','24','29','44') ) AND NOT EXISTS ( SELECT * FROM tpch_optimized.orders WHERE o_custkey = c_custkey ) ) AS custsale GROUP BY cntrycode ORDER BY cntrycode;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment