1、SSB数据生成器
生成数据文件
git clone https://github.com/electrum/ssb-dbgen.git
cd ssb-dbgen
sed -i 's/^MACHINE.*=.*/MACHINE=LINUX/' makefile # 将平台改为LINUX
make # 生成可执行程序dbgen
# 利用dbgen生成示例数据,-T指定表,-s指定数据放大系数
./dbgen -s 50 -T c # CUSTOMER表
1500000 customer.tbl
./dbgen -s 20 -T p # PART表
1000000 part.tbl
./dbgen -s 50 -T s # SUPPLIER表
100000 supplier.tbl
./dbgen -s 1 -T d # DATE_DIM表
2556 date.tbl
./dbgen -s 50 -T l # LINEORDER表
300005811 lineorder.tbl
2、ClickHouse操作
2.1、创建数据库
CREATE DATABASE IF NOT EXISTS ssb;
2.2、 创建数据表
use ssb;
CREATE TABLE customer
(
C_CUSTKEY UInt32,
C_NAME String,
C_ADDRESS String,
C_CITY LowCardinality(String),
C_NATION LowCardinality(String),
C_REGION LowCardinality(String),
C_PHONE String,
C_MKTSEGMENT LowCardinality(String)
)
ENGINE = MergeTree ORDER BY (C_CUSTKEY);
CREATE TABLE lineorder
(
LO_ORDERKEY UInt32,
LO_LINENUMBER UInt8,
LO_CUSTKEY UInt32,
LO_PARTKEY UInt32,
LO_SUPPKEY UInt32,
LO_ORDERDATE Date,
LO_ORDERPRIORITY LowCardinality(String),
LO_SHIPPRIORITY UInt8,
LO_QUANTITY UInt8,
LO_EXTENDEDPRICE UInt32,
LO_ORDTOTALPRICE UInt32,
LO_DISCOUNT UInt8,
LO_REVENUE UInt32,
LO_SUPPLYCOST UInt32,
LO_TAX UInt8,
LO_COMMITDATE Date,
LO_SHIPMODE LowCardinality(String)
)
ENGINE = MergeTree PARTITION BY toYear(LO_ORDERDATE) ORDER BY (LO_ORDERDATE, LO_ORDERKEY);
CREATE TABLE part
(
P_PARTKEY UInt32,
P_NAME String,
P_MFGR LowCardinality(String),
P_CATEGORY LowCardinality(String),
P_BRAND LowCardinality(String),
P_COLOR LowCardinality(String),
P_TYPE LowCardinality(String),
P_SIZE UInt8,
P_CONTAINER LowCardinality(String)
)
ENGINE = MergeTree ORDER BY P_PARTKEY;
CREATE TABLE supplier
(
S_SUPPKEY UInt32,
S_NAME String,
S_ADDRESS String,
S_CITY LowCardinality(String),
S_NATION LowCardinality(String),
S_REGION LowCardinality(String),
S_PHONE String
)
ENGINE = MergeTree ORDER BY S_SUPPKEY;
CREATE TABLE dates
(
D_DATEKEY UInt32,
D_DATE String,
D_DAYOFWEEK LowCardinality(String),
D_MONTH LowCardinality(String),
D_YEAR UInt32,
D_YEARMONTHNUM UInt32,
D_YEARMONTH String,
D_DAYNUMINWEEK UInt8,
D_DAYNUMINMONTH UInt8,
D_DAYNUMINYEAR UInt8,
D_MONTHNUMINYEAR UInt8,
D_WEEKNUMINYEAR UInt8,
D_SELLINGSEASON LowCardinality(String),
D_LASTDAYINWEEKFL LowCardinality(String),
D_LASTDAYINMONTHFL LowCardinality(String),
D_HOLIDAYFL LowCardinality(String),
D_WEEKDAYFL LowCardinality(String)
)
ENGINE = MergeTree ORDER BY D_DATEKEY;
SET max_memory_usage = 20000000000;
CREATE TABLE lineorder_flat
ENGINE = MergeTree ORDER BY (LO_ORDERDATE, LO_ORDERKEY)
AS SELECT
l.LO_ORDERKEY AS LO_ORDERKEY,
l.LO_LINENUMBER AS LO_LINENUMBER,
l.LO_CUSTKEY AS LO_CUSTKEY,
l.LO_PARTKEY AS LO_PARTKEY,
l.LO_SUPPKEY AS LO_SUPPKEY,
l.LO_ORDERDATE AS LO_ORDERDATE,
l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY,
l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY,
l.LO_QUANTITY AS LO_QUANTITY,
l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE,
l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE,
l.LO_DISCOUNT AS LO_DISCOUNT,
l.LO_REVENUE AS LO_REVENUE,
l.LO_SUPPLYCOST AS LO_SUPPLYCOST,
l.LO_TAX AS LO_TAX,
l.LO_COMMITDATE AS LO_COMMITDATE,
l.LO_SHIPMODE AS LO_SHIPMODE,
c.C_NAME AS C_NAME,
c.C_ADDRESS AS C_ADDRESS,
c.C_CITY AS C_CITY,
c.C_NATION AS C_NATION,
c.C_REGION AS C_REGION,
c.C_PHONE AS C_PHONE,
c.C_MKTSEGMENT AS C_MKTSEGMENT,
s.S_NAME AS S_NAME,
s.S_ADDRESS AS S_ADDRESS,
s.S_CITY AS S_CITY,
s.S_NATION AS S_NATION,
s.S_REGION AS S_REGION,
s.S_PHONE AS S_PHONE,
p.P_NAME AS P_NAME,
p.P_MFGR AS P_MFGR,
p.P_CATEGORY AS P_CATEGORY,
p.P_BRAND AS P_BRAND,
p.P_COLOR AS P_COLOR,
p.P_TYPE AS P_TYPE,
p.P_SIZE AS P_SIZE,
p.P_CONTAINER AS P_CONTAINER
FROM lineorder AS l
INNER JOIN customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEY
INNER JOIN supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY
INNER JOIN part AS p ON p.P_PARTKEY = l.LO_PARTKEY;
2.3、 导入数据
clickhouse-client -u default -h 192.168.200.215 --port 9090 --password ClickHouse*8 --query "INSERT INTO ssb.dates FORMAT CSV" < /data/ssb-dbgen/date.tbl
clickhouse-client -u default -h 192.168.200.215 --port 9090 --password ClickHouse*8 --query "INSERT INTO ssb.customer FORMAT CSV" < /data/ssb-dbgen/customer.tbl
clickhouse-client -u default -h 192.168.200.215 --port 9090 --password ClickHouse*8 --query "INSERT INTO ssb.part FORMAT CSV" < /data/ssb-dbgen/part.tbl
clickhouse-client -u default -h 192.168.200.215 --port 9090 --password ClickHouse*8 --query "INSERT INTO ssb.supplier FORMAT CSV" < /data/ssb-dbgen/supplier.tbl
clickhouse-client -u default -h 192.168.200.215 --port 9090 --password ClickHouse*8 --query "INSERT INTO ssb.lineorder FORMAT CSV" < /data/ssb-dbgen/lineorder.tbl
3、执行标准SQL-单表查询
3.1、执行SQL
--Q1.1
SELECT SUM(LO_REVENUE) AS REVENUE
FROM lineorder join dates ON toDate(LO_ORDERDATE) = toDate(D_DATEKEY)
WHERE D_YEAR = 1993 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25;
--Q1.2
SELECT SUM(LO_REVENUE) AS REVENUE
FROM lineorder
join dates ON toDate32(LO_ORDERDATE) = toDate32(D_DATEKEY)
WHERE D_YEARMONTHNUM = 199401
AND LO_DISCOUNT BETWEEN 4 AND 6
AND LO_QUANTITY BETWEEN 26 AND 35;
--Q1.3
SELECT SUM(LO_REVENUE) AS REVENUE
FROM lineorder
join dates ON toDate(LO_ORDERDATE) = toDate(D_DATEKEY)
WHERE D_WEEKNUMINYEAR = 6 AND D_YEAR = 1994
AND LO_DISCOUNT BETWEEN 5 AND 7
AND LO_QUANTITY BETWEEN 26 AND 35;
--Q2.1
SELECT SUM(LO_REVENUE) AS LO_REVENUE, D_YEAR, P_BRAND
FROM lineorder
join dates ON toDate(LO_ORDERDATE) = toDate(D_DATEKEY)
JOIN part ON LO_PARTKEY = P_PARTKEY
JOIN supplier ON LO_SUPPKEY = S_SUPPKEY
WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA'
GROUP BY D_YEAR, P_BRAND
ORDER BY D_YEAR, P_BRAND;
--Q2.2
SELECT SUM(LO_REVENUE) AS LO_REVENUE, D_YEAR, P_BRAND
FROM lineorder
join dates ON toDate(LO_ORDERDATE) = toDate(D_DATEKEY)
join part on LO_PARTKEY = P_PARTKEY
join supplier ON LO_SUPPKEY = S_SUPPKEY
WHERE P_BRAND BETWEEN 'MFGR#2221' AND 'MFGR#2228' AND S_REGION = 'ASIA'
GROUP BY D_YEAR, P_BRAND
ORDER BY D_YEAR, P_BRAND;
--Q2.3
SELECT SUM(LO_REVENUE) AS LO_REVENUE, D_YEAR, P_BRAND
FROM lineorder
join dates ON toDate(LO_ORDERDATE) = toDate(D_DATEKEY)
join part on LO_PARTKEY = P_PARTKEY
join supplier ON LO_SUPPKEY = S_SUPPKEY
WHERE P_BRAND = 'MFGR#2239' AND S_REGION = 'EUROPE'
GROUP BY D_YEAR, P_BRAND
ORDER BY D_YEAR, P_BRAND;
--Q3.1
SELECT C_NATION, S_NATION, D_YEAR, SUM(LO_REVENUE) AS LO_REVENUE
FROM lineorder
join dates ON toDate(LO_ORDERDATE) = toDate(D_DATEKEY)
join customer ON LO_CUSTKEY = C_CUSTKEY
join supplier ON LO_SUPPKEY = S_SUPPKEY
WHERE C_REGION = 'ASIA' AND S_REGION = 'ASIA'AND D_YEAR >= 1992 AND D_YEAR <= 1997
GROUP BY C_NATION, S_NATION, D_YEAR
ORDER BY D_YEAR ASC, LO_REVENUE DESC;
--Q3.2
SELECT C_CITY, S_CITY, D_YEAR, SUM(LO_REVENUE) AS LO_REVENUE
FROM lineorder
join dates ON toDate(LO_ORDERDATE) = toDate(D_DATEKEY)
join customer ON LO_CUSTKEY = C_CUSTKEY
join supplier ON LO_SUPPKEY = S_SUPPKEY
WHERE C_NATION = 'UNITED STATES' AND S_NATION = 'UNITED STATES'
AND D_YEAR >= 1992 AND D_YEAR <= 1997
GROUP BY C_CITY, S_CITY, D_YEAR
ORDER BY D_YEAR ASC, LO_REVENUE DESC;
--Q3.3
SELECT C_CITY, S_CITY, D_YEAR, SUM(LO_REVENUE) AS LO_REVENUE
FROM lineorder
join dates on toDate(LO_ORDERDATE) = toDate(D_DATEKEY)
join customer ON LO_CUSTKEY = C_CUSTKEY
join supplier ON LO_SUPPKEY = S_SUPPKEY
WHERE (C_CITY='UNITED KI1' OR C_CITY='UNITED KI5')
AND (S_CITY='UNITED KI1' OR S_CITY='UNITED KI5')
AND D_YEAR >= 1992 AND D_YEAR <= 1997
GROUP BY C_CITY, S_CITY, D_YEAR
ORDER BY D_YEAR ASC, LO_REVENUE DESC;
--Q3.4
SELECT C_CITY, S_CITY, D_YEAR, SUM(LO_REVENUE) AS LO_REVENUE
FROM lineorder
join dates ON toDate(LO_ORDERDATE) = toDate(D_DATEKEY)
join customer ON LO_CUSTKEY = C_CUSTKEY
join supplier ON LO_SUPPKEY = S_SUPPKEY
WHERE (C_CITY='UNITED KI1' OR C_CITY='UNITED KI5') AND (S_CITY='UNITED KI1' OR S_CITY='UNITED KI5') AND D_YEARMONTH = 'DEC1997'
GROUP BY C_CITY, S_CITY, D_YEAR
ORDER BY D_YEAR ASC, LO_REVENUE DESC;
--Q4.1
SELECT D_YEAR, C_NATION, SUM(LO_REVENUE) - SUM(LO_SUPPLYCOST) AS PROFIT
FROM lineorder
join dates ON toDate(LO_ORDERDATE) = toDate(D_DATEKEY)
join customer ON LO_CUSTKEY = C_CUSTKEY
join supplier ON LO_SUPPKEY = S_SUPPKEY
join part on LO_PARTKEY = P_PARTKEY
WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2')
GROUP BY D_YEAR, C_NATION
ORDER BY D_YEAR, C_NATION;
--Q4.2
SELECT D_YEAR, S_NATION, P_CATEGORY, SUM(LO_REVENUE) - SUM(LO_SUPPLYCOST) AS PROFIT
FROM lineorder
join dates ON toDate(LO_ORDERDATE) = toDate(D_DATEKEY)
join customer ON LO_CUSTKEY = C_CUSTKEY
join supplier ON LO_SUPPKEY = S_SUPPKEY
join part on LO_PARTKEY = P_PARTKEY
WHERE C_REGION = 'AMERICA'AND S_REGION = 'AMERICA'
AND (D_YEAR = 1997 OR D_YEAR = 1998)
AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2')
GROUP BY D_YEAR, S_NATION, P_CATEGORY
ORDER BY D_YEAR, S_NATION, P_CATEGORY;
--Q4.3
SELECT D_YEAR, S_CITY, P_BRAND, SUM(LO_REVENUE) - SUM(LO_SUPPLYCOST) AS PROFIT
FROM lineorder
join dates ON toDate(LO_ORDERDATE) = toDate(D_DATEKEY)
join customer ON LO_CUSTKEY = C_CUSTKEY
join supplier ON LO_SUPPKEY = S_SUPPKEY
join part on LO_PARTKEY = P_PARTKEY
WHERE C_REGION = 'AMERICA'AND S_NATION = 'UNITED STATES'
AND (D_YEAR = 1997 OR D_YEAR = 1998)
AND P_CATEGORY = 'MFGR#14'
GROUP BY D_YEAR, S_CITY, P_BRAND
ORDER BY D_YEAR, S_CITY, P_BRAND;
3.2、单表查询结果
4、执行标准SQL-多表查询
4.1、执行SQL
--Q1.1
select sum(LO_REVENUE) as revenue
from lineorder join dates ON toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
WHERE D_YEAR = 1993 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25;
--Q1.2
select sum(LO_REVENUE) as revenue
from lineorder
join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
WHERE D_YEARMONTHNUM = 199401
AND LO_DISCOUNT BETWEEN 4 AND 6
AND LO_QUANTITY BETWEEN 26 AND 35;
--Q1.3
select sum(LO_REVENUE) as revenue
from lineorder
join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
WHERE D_WEEKNUMINYEAR = 6 AND D_YEAR = 1994
AND LO_DISCOUNT BETWEEN 5 AND 7
AND LO_QUANTITY BETWEEN 26 AND 35;
--Q2.1
select sum(LO_REVENUE) as lo_revenue, D_YEAR, P_BRAND
from lineorder
join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
join part on LO_PARTKEY = P_PARTKEY
join supplier on LO_SUPPKEY = S_SUPPKEY
WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA'
GROUP BY D_YEAR, P_BRAND
ORDER BY D_YEAR, P_BRAND;
--Q2.2
select sum(LO_REVENUE) as lo_revenue, D_YEAR, P_BRAND
from lineorder
join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
join part on LO_PARTKEY = P_PARTKEY
join supplier on LO_SUPPKEY = S_SUPPKEY
WHERE P_BRAND BETWEEN 'MFGR#2221' AND 'MFGR#2228' AND S_REGION = 'ASIA'
GROUP BY D_YEAR, P_BRAND
ORDER BY D_YEAR, P_BRAND;
--Q2.3
select sum(LO_REVENUE) as lo_revenue, D_YEAR, P_BRAND
from lineorder
join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
join part on LO_PARTKEY = P_PARTKEY
join supplier on LO_SUPPKEY = S_SUPPKEY
WHERE P_BRAND = 'MFGR#2239' AND S_REGION = 'EUROPE'
GROUP BY D_YEAR, P_BRAND
ORDER BY D_YEAR, P_BRAND;
--Q3.1
select C_NATION, S_NATION, D_YEAR, sum(LO_REVENUE) as LO_REVENUE
from lineorder
join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
join customer on LO_CUSTKEY = C_CUSTKEY
join supplier on LO_SUPPKEY = S_SUPPKEY
WHERE C_REGION = 'ASIA' AND S_REGION = 'ASIA'AND D_YEAR >= 1992 AND D_YEAR <= 1997
GROUP BY C_NATION, S_NATION, D_YEAR
ORDER BY D_YEAR ASC, LO_REVENUE DESC;
--Q3.2
select C_CITY, S_CITY, D_YEAR, sum(LO_REVENUE) AS LO_REVENUE
from lineorder
join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
join customer on LO_CUSTKEY = C_CUSTKEY
join supplier on LO_SUPPKEY = S_SUPPKEY
WHERE C_NATION = 'UNITED STATES' AND S_NATION = 'UNITED STATES'
AND D_YEAR >= 1992 AND D_YEAR <= 1997
GROUP BY C_CITY, S_CITY, D_YEAR
ORDER BY D_YEAR ASC, LO_REVENUE DESC;
--Q3.3
select C_CITY, S_CITY, D_YEAR, SUM(LO_REVENUE) AS LO_REVENUE
from lineorder
join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
join customer ON LO_CUSTKEY = C_CUSTKEY
join supplier ON LO_SUPPKEY = S_SUPPKEY
WHERE (C_CITY='UNITED KI1' OR C_CITY='UNITED KI5')
AND (S_CITY='UNITED KI1' OR S_CITY='UNITED KI5')
AND D_YEAR >= 1992 AND D_YEAR <= 1997
GROUP BY C_CITY, S_CITY, D_YEAR
ORDER BY D_YEAR ASC, LO_REVENUE DESC;
--Q3.4
select C_CITY, S_CITY, D_YEAR, SUM(LO_REVENUE) AS LO_REVENUE
from lineorder
join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
join customer ON LO_CUSTKEY = C_CUSTKEY
join supplier ON LO_SUPPKEY = S_SUPPKEY
WHERE (C_CITY='UNITED KI1' OR C_CITY='UNITED KI5') AND (S_CITY='UNITED KI1' OR S_CITY='UNITED KI5') AND D_YEARMONTH = 'DEC1997'
GROUP BY C_CITY, S_CITY, D_YEAR
ORDER BY D_YEAR ASC, LO_REVENUE DESC;
--Q4.1
select D_YEAR, C_NATION, SUM(LO_REVENUE) - SUM(LO_SUPPLYCOST) AS PROFIT
from lineorder
join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
join customer ON LO_CUSTKEY = C_CUSTKEY
join supplier ON LO_SUPPKEY = S_SUPPKEY
join part on LO_PARTKEY = P_PARTKEY
WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2')
GROUP BY D_YEAR, C_NATION
ORDER BY D_YEAR, C_NATION;
--Q4.2
select D_YEAR, S_NATION, P_CATEGORY, SUM(LO_REVENUE) - SUM(LO_SUPPLYCOST) AS PROFIT
from lineorder
join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
join customer on LO_CUSTKEY = C_CUSTKEY
join supplier on LO_SUPPKEY = S_SUPPKEY
join part on LO_PARTKEY = P_PARTKEY
WHERE C_REGION = 'AMERICA'AND S_REGION = 'AMERICA'
AND (D_YEAR = 1997 OR D_YEAR = 1998)
AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2')
GROUP BY D_YEAR, S_NATION, P_CATEGORY
ORDER BY D_YEAR, S_NATION, P_CATEGORY;
--Q4.3
select D_YEAR, S_CITY, P_BRAND, SUM(LO_REVENUE) - SUM(LO_SUPPLYCOST) AS PROFIT
from lineorder
join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
join customer on LO_CUSTKEY = C_CUSTKEY
join supplier on LO_SUPPKEY = S_SUPPKEY
join part on LO_PARTKEY = P_PARTKEY
WHERE C_REGION = 'AMERICA'AND S_NATION = 'UNITED STATES'
AND (D_YEAR = 1997 OR D_YEAR = 1998)
AND P_CATEGORY = 'MFGR#14'
GROUP BY D_YEAR, S_CITY, P_BRAND
ORDER BY D_YEAR, S_CITY, P_BRAND;