1、SSB数据生成器
1.1、编译ssb-poc工具包
wget http://dorisdb-public.oss-cn-zhangjiakou.aliyuncs.com/ssb-poc-0.9.zip
unzip ssb-poc-0.9.zip
cd ssb-poc
make && make install
#所有相关工具安装到output目录
1.2、生成数据文件
cd output
# 运行生成数据的脚本
bin/gen-ssb.sh 50 data_dir
1.3、Doris建表
create table if not exists `lineorder` (
`lo_orderkey` int(11) not null comment "",
`lo_linenumber` int(11) not null comment "",
`lo_custkey` int(11) not null comment "",
`lo_partkey` int(11) not null comment "",
`lo_suppkey` int(11) not null comment "",
`lo_orderdate` int(11) not null comment "",
`lo_orderpriority` varchar(16) not null comment "",
`lo_shippriority` int(11) not null comment "",
`lo_quantity` int(11) not null comment "",
`lo_extendedprice` int(11) not null comment "",
`lo_ordtotalprice` int(11) not null comment "",
`lo_discount` int(11) not null comment "",
`lo_revenue` int(11) not null comment "",
`lo_supplycost` int(11) not null comment "",
`lo_tax` int(11) not null comment "",
`lo_commitdate` int(11) not null comment "",
`lo_shipmode` varchar(11) not null comment ""
) engine=olap
duplicate key(`lo_orderkey`)
comment "olap"
distributed by hash(`lo_orderkey`) buckets 96
properties (
"replication_num" = "1",
"colocate_with" = "group1",
"in_memory" = "false",
"storage_format" = "default"
);
create table if not exists `customer` (
`c_custkey` int(11) not null comment "",
`c_name` varchar(26) not null comment "",
`c_address` varchar(41) not null comment "",
`c_city` varchar(11) not null comment "",
`c_nation` varchar(16) not null comment "",
`c_region` varchar(13) not null comment "",
`c_phone` varchar(16) not null comment "",
`c_mktsegment` varchar(11) not null comment ""
) engine=olap
duplicate key(`c_custkey`)
comment "olap"
distributed by hash(`c_custkey`) buckets 12
properties (
"replication_num" = "1",
"colocate_with" = "groupa2",
"in_memory" = "false",
"storage_format" = "default"
);
create table if not exists `dates` (
`d_datekey` int(11) not null comment "",
`d_date` varchar(20) not null comment "",
`d_dayofweek` varchar(10) not null comment "",
`d_month` varchar(11) not null comment "",
`d_year` int(11) not null comment "",
`d_yearmonthnum` int(11) not null comment "",
`d_yearmonth` varchar(9) not null comment "",
`d_daynuminweek` int(11) not null comment "",
`d_daynuminmonth` int(11) not null comment "",
`d_daynuminyear` int(11) not null comment "",
`d_monthnuminyear` int(11) not null comment "",
`d_weeknuminyear` int(11) not null comment "",
`d_sellingseason` varchar(14) not null comment "",
`d_lastdayinweekfl` int(11) not null comment "",
`d_lastdayinmonthfl` int(11) not null comment "",
`d_holidayfl` int(11) not null comment "",
`d_weekdayfl` int(11) not null comment ""
) engine=olap
duplicate key(`d_datekey`)
comment "olap"
distributed by hash(`d_datekey`) buckets 1
properties (
"replication_num" = "1",
"in_memory" = "false",
"colocate_with" = "groupa3",
"storage_format" = "default"
);
create table if not exists `supplier` (
`s_suppkey` int(11) not null comment "",
`s_name` varchar(26) not null comment "",
`s_address` varchar(26) not null comment "",
`s_city` varchar(11) not null comment "",
`s_nation` varchar(16) not null comment "",
`s_region` varchar(13) not null comment "",
`s_phone` varchar(16) not null comment ""
) engine=olap
duplicate key(`s_suppkey`)
comment "olap"
distributed by hash(`s_suppkey`) buckets 12
properties (
"replication_num" = "1",
"colocate_with" = "groupa4",
"in_memory" = "false",
"storage_format" = "default"
);
create table if not exists `part` (
`p_partkey` int(11) not null comment "",
`p_name` varchar(23) not null comment "",
`p_mfgr` varchar(7) not null comment "",
`p_category` varchar(8) not null comment "",
`p_brand` varchar(10) not null comment "",
`p_color` varchar(12) not null comment "",
`p_type` varchar(26) not null comment "",
`p_size` int(11) not null comment "",
`p_container` varchar(11) not null comment ""
) engine=olap
duplicate key(`p_partkey`)
comment "olap"
distributed by hash(`p_partkey`) buckets 12
properties (
"replication_num" = "1",
"colocate_with" = "groupa5",
"in_memory" = "false",
"storage_format" = "default"
);
create table if not exists `lineorder_flat` (
`lo_orderkey` int(11) not null comment "",
`lo_orderdate` date not null comment "",
`lo_linenumber` tinyint(4) not null comment "",
`lo_custkey` int(11) not null comment "",
`lo_partkey` int(11) not null comment "",
`lo_suppkey` int(11) not null comment "",
`lo_orderpriority` varchar(100) not null comment "",
`lo_shippriority` tinyint(4) not null comment "",
`lo_quantity` tinyint(4) not null comment "",
`lo_extendedprice` int(11) not null comment "",
`lo_ordtotalprice` int(11) not null comment "",
`lo_discount` tinyint(4) not null comment "",
`lo_revenue` int(11) not null comment "",
`lo_supplycost` int(11) not null comment "",
`lo_tax` tinyint(4) not null comment "",
`lo_commitdate` date not null comment "",
`lo_shipmode` varchar(100) not null comment "",
`c_name` varchar(100) not null comment "",
`c_address` varchar(100) not null comment "",
`c_city` varchar(100) not null comment "",
`c_nation` varchar(100) not null comment "",
`c_region` varchar(100) not null comment "",
`c_phone` varchar(100) not null comment "",
`c_mktsegment` varchar(100) not null comment "",
`s_name` varchar(100) not null comment "",
`s_address` varchar(100) not null comment "",
`s_city` varchar(100) not null comment "",
`s_nation` varchar(100) not null comment "",
`s_region` varchar(100) not null comment "",
`s_phone` varchar(100) not null comment "",
`p_name` varchar(100) not null comment "",
`p_mfgr` varchar(100) not null comment "",
`p_category` varchar(100) not null comment "",
`p_brand` varchar(100) not null comment "",
`p_color` varchar(100) not null comment "",
`p_type` varchar(100) not null comment "",
`p_size` tinyint(4) not null comment "",
`p_container` varchar(100) not null comment ""
) engine=olap
duplicate key(`lo_orderkey`)
comment "olap"
distributed by hash(`lo_orderkey`) buckets 192
properties (
"replication_num" = "1",
"colocate_with" = "groupxx1",
"in_memory" = "false",
"storage_format" = "default"
);
1.4、导入数据
通过stream load
curl --location-trusted -u root:Abcd*520 -H "column_separator:|" -T /data/ssb-poc/output/data_dir/customer.tbl http://192.166.111.111:8030/api/changxin/customer/_stream_load
curl --location-trusted -u root:Abcd*520 -H "column_separator:|" -T /data/ssb-poc/output/data_dir/dates.tbl http://192.166.111.111:8030/api/changxin/dates/_stream_load
curl --location-trusted -u root:Abcd*520 -H "column_separator:|" -T /data/ssb-poc/output/data_dir/part.tbl http://192.166.111.111:8030/api/changxin/part/_stream_load
curl --location-trusted -u root:Abcd*520 -H "column_separator:|" -T /data/ssb-poc/output/data_dir/supplier.tbl http://192.166.111.111:8030/api/changxin/supplier/_stream_load
curl --location-trusted -u root:Abcd*520 -H "column_separator:|" -T /data/ssb-poc/output/data_dir/lineorder.tbl.1 http://192.166.111.111:8030/api/changxin/lineorder/_stream_load
1.5、生成lineorder_flat单表数据
insert into lineorder_flat select `lo_orderkey` , `lo_orderdate` , `lo_linenumber` , `lo_custkey` , `lo_partkey` , `lo_suppkey` , `lo_orderpriotity` , `lo_shippriotity` , `lo_quantity` , `lo_extendedprice` , `lo_ordtotalprice` , `lo_discount` , `lo_revenue` , `lo_supplycost` , `lo_tax` , `lo_commitdate` , `lo_shipmode` , `c_name` , `c_address` , `c_city` , `c_nation` , `c_region` , `c_phone` , `c_mktsegment` , `s_name` , `s_address` , `s_city` , `s_nation` , `s_region` , `s_phone` , `p_name` , `p_mfgr` , `p_category` , `p_brand` , `p_color` , `p_type` , `p_size` , `p_container` from lineorder l inner join customer c on (c.c_custkey = l.lo_custkey) inner join supplier s on (s.s_suppkey = l.lo_suppkey) inner join part p on (p.p_partkey = l.lo_partkey) where year(lo_orderdate) in (1992, 1993);
insert into lineorder_flat select `lo_orderkey` , `lo_orderdate` , `lo_linenumber` , `lo_custkey` , `lo_partkey` , `lo_suppkey` , `lo_orderpriotity` , `lo_shippriotity` , `lo_quantity` , `lo_extendedprice` , `lo_ordtotalprice` , `lo_discount` , `lo_revenue` , `lo_supplycost` , `lo_tax` , `lo_commitdate` , `lo_shipmode` , `c_name` , `c_address` , `c_city` , `c_nation` , `c_region` , `c_phone` , `c_mktsegment` , `s_name` , `s_address` , `s_city` , `s_nation` , `s_region` , `s_phone` , `p_name` , `p_mfgr` , `p_category` , `p_brand` , `p_color` , `p_type` , `p_size` , `p_container` from lineorder l inner join customer c on (c.c_custkey = l.lo_custkey) inner join supplier s on (s.s_suppkey = l.lo_suppkey) inner join part p on (p.p_partkey = l.lo_partkey) where year(lo_orderdate) in (1994, 1995);
insert into lineorder_flat select `lo_orderkey` , `lo_orderdate` , `lo_linenumber` , `lo_custkey` , `lo_partkey` , `lo_suppkey` , `lo_orderpriotity` , `lo_shippriotity` , `lo_quantity` , `lo_extendedprice` , `lo_ordtotalprice` , `lo_discount` , `lo_revenue` , `lo_supplycost` , `lo_tax` , `lo_commitdate` , `lo_shipmode` , `c_name` , `c_address` , `c_city` , `c_nation` , `c_region` , `c_phone` , `c_mktsegment` , `s_name` , `s_address` , `s_city` , `s_nation` , `s_region` , `s_phone` , `p_name` , `p_mfgr` , `p_category` , `p_brand` , `p_color` , `p_type` , `p_size` , `p_container` from lineorder l inner join customer c on (c.c_custkey = l.lo_custkey) inner join supplier s on (s.s_suppkey = l.lo_suppkey) inner join part p on (p.p_partkey = l.lo_partkey) where year(lo_orderdate) in (1996, 1997);
insert into lineorder_flat select `lo_orderkey` , `lo_orderdate` , `lo_linenumber` , `lo_custkey` , `lo_partkey` , `lo_suppkey` , `lo_orderpriotity` , `lo_shippriotity` , `lo_quantity` , `lo_extendedprice` , `lo_ordtotalprice` , `lo_discount` , `lo_revenue` , `lo_supplycost` , `lo_tax` , `lo_commitdate` , `lo_shipmode` , `c_name` , `c_address` , `c_city` , `c_nation` , `c_region` , `c_phone` , `c_mktsegment` , `s_name` , `s_address` , `s_city` , `s_nation` , `s_region` , `s_phone` , `p_name` , `p_mfgr` , `p_category` , `p_brand` , `p_color` , `p_type` , `p_size` , `p_container` from lineorder l inner join customer c on (c.c_custkey = l.lo_custkey) inner join supplier s on (s.s_suppkey = l.lo_suppkey) inner join part p on (p.p_partkey = l.lo_partkey) where year(lo_orderdate) in (1998);
2、执行标准SQL-单表查询
--Q1.1
SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM lineorder_flat
WHERE LO_ORDERDATE >= 19930101 and LO_ORDERDATE <= 19931231 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25;
--Q1.2
SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat
WHERE LO_ORDERDATE >= 19940101 and LO_ORDERDATE <= 19940131 AND LO_DISCOUNT
BETWEEN 4 AND 6 AND LO_QUANTITY BETWEEN 26 AND 35;
--Q1.3
SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM lineorder_flat
WHERE weekofyear(LO_ORDERDATE) = 6 AND LO_ORDERDATE >= 19940101 and LO_ORDERDATE <= 19941231
AND LO_DISCOUNT BETWEEN 5 AND 7 AND LO_QUANTITY BETWEEN 26 AND 35;
--Q2.1
SELECT
sum(LO_REVENUE),
(LO_ORDERDATE DIV 10000) AS year,
P_BRAND
FROM lineorder_flat
WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA'
GROUP BY
year,
P_BRAND
ORDER BY
year,
P_BRAND;
--Q2.2
SELECT
sum(LO_REVENUE),
(LO_ORDERDATE DIV 10000) AS year,
P_BRAND
FROM lineorder_flat
WHERE P_BRAND >= 'MFGR#2221' AND P_BRAND <= 'MFGR#2228' AND S_REGION = 'ASIA'
GROUP BY
year,
P_BRAND
ORDER BY
year,
P_BRAND;
--Q2.3
SELECT
sum(LO_REVENUE),
(LO_ORDERDATE DIV 10000) AS year,
P_BRAND
FROM lineorder_flat
WHERE P_BRAND = 'MFGR#2239' AND S_REGION = 'EUROPE'
GROUP BY
year,
P_BRAND
ORDER BY
year,
P_BRAND;
--Q3.1
SELECT
C_NATION,
S_NATION,
(LO_ORDERDATE DIV 10000) AS year,
sum(LO_REVENUE) AS revenue
FROM lineorder_flat
WHERE C_REGION = 'ASIA' AND S_REGION = 'ASIA' AND LO_ORDERDATE >= 19920101 AND LO_ORDERDATE <= 19971231
GROUP BY
C_NATION,
S_NATION,
year
ORDER BY
year ASC,
revenue DESC;
--Q3.2
SELECT
C_CITY,
S_CITY,
(LO_ORDERDATE DIV 10000) AS year,
sum(LO_REVENUE) AS revenue
FROM lineorder_flat
WHERE C_NATION = 'UNITED STATES' AND S_NATION = 'UNITED STATES' AND LO_ORDERDATE >= 19920101 AND LO_ORDERDATE <= 19971231
GROUP BY
C_CITY,
S_CITY,
year
ORDER BY
year ASC,
revenue DESC;
--Q3.3
SELECT
C_CITY,
S_CITY,
(LO_ORDERDATE DIV 10000) AS year,
sum(LO_REVENUE) AS revenue
FROM lineorder_flat
WHERE C_CITY in ( 'UNITED KI1' ,'UNITED KI5') AND S_CITY in ( 'UNITED KI1' ,'UNITED KI5') AND LO_ORDERDATE >= 19920101 AND LO_ORDERDATE <= 19971231
GROUP BY
C_CITY,
S_CITY,
year
ORDER BY
year ASC,
revenue DESC;
--Q3.4
SELECT
C_CITY,
S_CITY,
(LO_ORDERDATE DIV 10000) AS year,
sum(LO_REVENUE) AS revenue
FROM lineorder_flat
WHERE C_CITY in ('UNITED KI1', 'UNITED KI5') AND S_CITY in ( 'UNITED KI1', 'UNITED KI5') AND LO_ORDERDATE >= 19971201 AND LO_ORDERDATE <= 19971231
GROUP BY
C_CITY,
S_CITY,
year
ORDER BY
year ASC,
revenue DESC;
--Q4.1
SELECT
(LO_ORDERDATE DIV 10000) AS year,
C_NATION,
sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM lineorder_flat
WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND P_MFGR in ( 'MFGR#1' , 'MFGR#2')
GROUP BY
year,
C_NATION
ORDER BY
year ASC,
C_NATION ASC;
--Q4.2
SELECT
(LO_ORDERDATE DIV 10000) AS year,
S_NATION,
P_CATEGORY,
sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM lineorder_flat
WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND LO_ORDERDATE >= 19970101 and LO_ORDERDATE <= 19981231 AND P_MFGR in ( 'MFGR#1' , 'MFGR#2')
GROUP BY
year,
S_NATION,
P_CATEGORY
ORDER BY
year ASC,
S_NATION ASC,
P_CATEGORY ASC;
--Q4.3
SELECT
(LO_ORDERDATE DIV 10000) AS year,
S_CITY,
P_BRAND,
sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM lineorder_flat
WHERE S_NATION = 'UNITED STATES' AND LO_ORDERDATE >= 19970101 and LO_ORDERDATE <= 19981231 AND P_CATEGORY = 'MFGR#14'
GROUP BY
year,
S_CITY,
P_BRAND
ORDER BY
year ASC,
S_CITY ASC,
P_BRAND ASC;
2.1、单表查询结果
3、执行标准SQL-多表查询
--Q1.1
select sum(lo_revenue) as revenue
from lineorder join dates on lo_orderdate = d_datekey
where d_year = 1993 and lo_discount between 1 and 3 and lo_quantity < 25;
--Q1.2
select sum(lo_revenue) as revenue
from lineorder
join dates on lo_orderdate = d_datekey
where d_yearmonthnum = 199401
and lo_discount between 4 and 6
and lo_quantity between 26 and 35;
--Q1.3
select sum(lo_revenue) as revenue
from lineorder
join dates on lo_orderdate = d_datekey
where d_weeknuminyear = 6 and d_year = 1994
and lo_discount between 5 and 7
and lo_quantity between 26 and 35;
--Q2.1
select sum(lo_revenue) as lo_revenue, d_year, p_brand
from lineorder
join dates on lo_orderdate = d_datekey
join part on lo_partkey = p_partkey
join supplier on lo_suppkey = s_suppkey
where p_category = 'MFGR#12' and s_region = 'AMERICA'
group by d_year, p_brand
order by d_year, p_brand;
--Q2.2
select sum(lo_revenue) as lo_revenue, d_year, p_brand
from lineorder
join dates on lo_orderdate = d_datekey
join part on lo_partkey = p_partkey
join supplier on lo_suppkey = s_suppkey
where p_brand between 'MFGR#2221' and 'MFGR#2228' and s_region = 'ASIA'
group by d_year, p_brand
order by d_year, p_brand;
--Q2.3
select sum(lo_revenue) as lo_revenue, d_year, p_brand
from lineorder
join dates on lo_orderdate = d_datekey
join part on lo_partkey = p_partkey
join supplier on lo_suppkey = s_suppkey
where p_brand = 'MFGR#2239' and s_region = 'EUROPE'
group by d_year, p_brand
order by d_year, p_brand;
--Q3.1
select c_nation, s_nation, d_year, sum(lo_revenue) as lo_revenue
from lineorder
join dates on lo_orderdate = d_datekey
join customer on lo_custkey = c_custkey
join supplier on lo_suppkey = s_suppkey
where c_region = 'ASIA' and s_region = 'ASIA'and d_year >= 1992 and d_year <= 1997
group by c_nation, s_nation, d_year
order by d_year asc, lo_revenue desc;
--Q3.2
select c_city, s_city, d_year, sum(lo_revenue) as lo_revenue
from lineorder
join dates on lo_orderdate = d_datekey
join customer on lo_custkey = c_custkey
join supplier on lo_suppkey = s_suppkey
where c_nation = 'UNITED STATES' and s_nation = 'UNITED STATES'
and d_year >= 1992 and d_year <= 1997
group by c_city, s_city, d_year
order by d_year asc, lo_revenue desc;
--Q3.3
select c_city, s_city, d_year, sum(lo_revenue) as lo_revenue
from lineorder
join dates on lo_orderdate = d_datekey
join customer on lo_custkey = c_custkey
join supplier on lo_suppkey = s_suppkey
where (c_city='UNITED KI1' or c_city='UNITED KI5')
and (s_city='UNITED KI1' or s_city='UNITED KI5')
and d_year >= 1992 and d_year <= 1997
group by c_city, s_city, d_year
order by d_year asc, lo_revenue desc;
--Q3.4
select c_city, s_city, d_year, sum(lo_revenue) as lo_revenue
from lineorder
join dates on lo_orderdate = d_datekey
join customer on lo_custkey = c_custkey
join supplier on lo_suppkey = s_suppkey
where (c_city='UNITED KI1' or c_city='UNITED KI5') and (s_city='UNITED KI1' or s_city='UNITED KI5') and d_yearmonth = 'Dec1997'
group by c_city, s_city, d_year
order by d_year asc, lo_revenue desc;
--Q4.1
select d_year, c_nation, sum(lo_revenue) - sum(lo_supplycost) as profit
from lineorder
join dates on lo_orderdate = d_datekey
join customer on lo_custkey = c_custkey
join supplier on lo_suppkey = s_suppkey
join part on lo_partkey = p_partkey
where c_region = 'AMERICA' and s_region = 'AMERICA' and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2')
group by d_year, c_nation
order by d_year, c_nation;
--Q4.2
select d_year, s_nation, p_category, sum(lo_revenue) - sum(lo_supplycost) as profit
from lineorder
join dates on lo_orderdate = d_datekey
join customer on lo_custkey = c_custkey
join supplier on lo_suppkey = s_suppkey
join part on lo_partkey = p_partkey
where c_region = 'AMERICA'and s_region = 'AMERICA'
and (d_year = 1997 or d_year = 1998)
and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2')
group by d_year, s_nation, p_category
order by d_year, s_nation, p_category;
--Q4.3
select d_year, s_city, p_brand, sum(lo_revenue) - sum(lo_supplycost) as profit
from lineorder
join dates on lo_orderdate = d_datekey
join customer on lo_custkey = c_custkey
join supplier on lo_suppkey = s_suppkey
join part on lo_partkey = p_partkey
where c_region = 'AMERICA'and s_nation = 'UNITED STATES'
and (d_year = 1997 or d_year = 1998)
and p_category = 'MFGR#14'
group by d_year, s_city, p_brand
order by d_year, s_city, p_brand;