SSB基准测试-Apache Doris

1、SSB数据生成器

1.1、编译ssb-poc工具包

wget http://dorisdb-public.oss-cn-zhangjiakou.aliyuncs.com/ssb-poc-0.9.zip
unzip ssb-poc-0.9.zip
cd ssb-poc
make && make install
#所有相关工具安装到output目录

1.2、生成数据文件

cd output 
# 运行生成数据的脚本
bin/gen-ssb.sh 50 data_dir

1.3、Doris建表

create table if not exists `lineorder` (
  `lo_orderkey` int(11) not null comment "",
  `lo_linenumber` int(11) not null comment "",
  `lo_custkey` int(11) not null comment "",
  `lo_partkey` int(11) not null comment "",
  `lo_suppkey` int(11) not null comment "",
  `lo_orderdate` int(11) not null comment "",
  `lo_orderpriority` varchar(16) not null comment "",
  `lo_shippriority` int(11) not null comment "",
  `lo_quantity` int(11) not null comment "",
  `lo_extendedprice` int(11) not null comment "",
  `lo_ordtotalprice` int(11) not null comment "",
  `lo_discount` int(11) not null comment "",
  `lo_revenue` int(11) not null comment "",
  `lo_supplycost` int(11) not null comment "",
  `lo_tax` int(11) not null comment "",
  `lo_commitdate` int(11) not null comment "",
  `lo_shipmode` varchar(11) not null comment ""
) engine=olap
duplicate key(`lo_orderkey`)
comment "olap"
distributed by hash(`lo_orderkey`) buckets 96
properties (
"replication_num" = "1",
"colocate_with" = "group1",
"in_memory" = "false",
"storage_format" = "default"
);
  
  
create table if not exists `customer` (
  `c_custkey` int(11) not null comment "",
  `c_name` varchar(26) not null comment "",
  `c_address` varchar(41) not null comment "",
  `c_city` varchar(11) not null comment "",
  `c_nation` varchar(16) not null comment "",
  `c_region` varchar(13) not null comment "",
  `c_phone` varchar(16) not null comment "",
  `c_mktsegment` varchar(11) not null comment ""
) engine=olap
duplicate key(`c_custkey`)
comment "olap"
distributed by hash(`c_custkey`) buckets 12
properties (
"replication_num" = "1",
"colocate_with" = "groupa2",
"in_memory" = "false",
"storage_format" = "default"
);
  
  
create table if not exists `dates` (
  `d_datekey` int(11) not null comment "",
  `d_date` varchar(20) not null comment "",
  `d_dayofweek` varchar(10) not null comment "",
  `d_month` varchar(11) not null comment "",
  `d_year` int(11) not null comment "",
  `d_yearmonthnum` int(11) not null comment "",
  `d_yearmonth` varchar(9) not null comment "",
  `d_daynuminweek` int(11) not null comment "",
  `d_daynuminmonth` int(11) not null comment "",
  `d_daynuminyear` int(11) not null comment "",
  `d_monthnuminyear` int(11) not null comment "",
  `d_weeknuminyear` int(11) not null comment "",
  `d_sellingseason` varchar(14) not null comment "",
  `d_lastdayinweekfl` int(11) not null comment "",
  `d_lastdayinmonthfl` int(11) not null comment "",
  `d_holidayfl` int(11) not null comment "",
  `d_weekdayfl` int(11) not null comment ""
) engine=olap
duplicate key(`d_datekey`)
comment "olap"
distributed by hash(`d_datekey`) buckets 1
properties (
"replication_num" = "1",
"in_memory" = "false",
"colocate_with" = "groupa3",
"storage_format" = "default"
);
  
create table if not exists `supplier` (
  `s_suppkey` int(11) not null comment "",
  `s_name` varchar(26) not null comment "",
  `s_address` varchar(26) not null comment "",
  `s_city` varchar(11) not null comment "",
  `s_nation` varchar(16) not null comment "",
  `s_region` varchar(13) not null comment "",
  `s_phone` varchar(16) not null comment ""
) engine=olap
duplicate key(`s_suppkey`)
comment "olap"
distributed by hash(`s_suppkey`) buckets 12
properties (
"replication_num" = "1",
"colocate_with" = "groupa4",
"in_memory" = "false",
"storage_format" = "default"
);
  
create table if not exists `part` (
  `p_partkey` int(11) not null comment "",
  `p_name` varchar(23) not null comment "",
  `p_mfgr` varchar(7) not null comment "",
  `p_category` varchar(8) not null comment "",
  `p_brand` varchar(10) not null comment "",
  `p_color` varchar(12) not null comment "",
  `p_type` varchar(26) not null comment "",
  `p_size` int(11) not null comment "",
  `p_container` varchar(11) not null comment ""
) engine=olap
duplicate key(`p_partkey`)
comment "olap"
distributed by hash(`p_partkey`) buckets 12
properties (
"replication_num" = "1",
"colocate_with" = "groupa5",
"in_memory" = "false",
"storage_format" = "default"
);
  
create table if not exists `lineorder_flat` (
  `lo_orderkey` int(11) not null comment "",
  `lo_orderdate` date not null comment "",
  `lo_linenumber` tinyint(4) not null comment "",
  `lo_custkey` int(11) not null comment "",
  `lo_partkey` int(11) not null comment "",
  `lo_suppkey` int(11) not null comment "",
  `lo_orderpriority` varchar(100) not null comment "",
  `lo_shippriority` tinyint(4) not null comment "",
  `lo_quantity` tinyint(4) not null comment "",
  `lo_extendedprice` int(11) not null comment "",
  `lo_ordtotalprice` int(11) not null comment "",
  `lo_discount` tinyint(4) not null comment "",
  `lo_revenue` int(11) not null comment "",
  `lo_supplycost` int(11) not null comment "",
  `lo_tax` tinyint(4) not null comment "",
  `lo_commitdate` date not null comment "",
  `lo_shipmode` varchar(100) not null comment "",
  `c_name` varchar(100) not null comment "",
  `c_address` varchar(100) not null comment "",
  `c_city` varchar(100) not null comment "",
  `c_nation` varchar(100) not null comment "",
  `c_region` varchar(100) not null comment "",
  `c_phone` varchar(100) not null comment "",
  `c_mktsegment` varchar(100) not null comment "",
  `s_name` varchar(100) not null comment "",
  `s_address` varchar(100) not null comment "",
  `s_city` varchar(100) not null comment "",
  `s_nation` varchar(100) not null comment "",
  `s_region` varchar(100) not null comment "",
  `s_phone` varchar(100) not null comment "",
  `p_name` varchar(100) not null comment "",
  `p_mfgr` varchar(100) not null comment "",
  `p_category` varchar(100) not null comment "",
  `p_brand` varchar(100) not null comment "",
  `p_color` varchar(100) not null comment "",
  `p_type` varchar(100) not null comment "",
  `p_size` tinyint(4) not null comment "",
  `p_container` varchar(100) not null comment ""
) engine=olap
duplicate key(`lo_orderkey`)
comment "olap"
distributed by hash(`lo_orderkey`) buckets 192
properties (
"replication_num" = "1",
"colocate_with" = "groupxx1",
"in_memory" = "false",
"storage_format" = "default"
);

1.4、导入数据

通过stream load

curl --location-trusted -u root:Abcd*520 -H "column_separator:|" -T /data/ssb-poc/output/data_dir/customer.tbl http://192.166.111.111:8030/api/changxin/customer/_stream_load

curl --location-trusted -u root:Abcd*520 -H "column_separator:|" -T /data/ssb-poc/output/data_dir/dates.tbl http://192.166.111.111:8030/api/changxin/dates/_stream_load

curl --location-trusted -u root:Abcd*520 -H "column_separator:|" -T /data/ssb-poc/output/data_dir/part.tbl http://192.166.111.111:8030/api/changxin/part/_stream_load

curl --location-trusted -u root:Abcd*520 -H "column_separator:|" -T /data/ssb-poc/output/data_dir/supplier.tbl http://192.166.111.111:8030/api/changxin/supplier/_stream_load

curl --location-trusted -u root:Abcd*520 -H "column_separator:|" -T /data/ssb-poc/output/data_dir/lineorder.tbl.1 http://192.166.111.111:8030/api/changxin/lineorder/_stream_load

1.5、生成lineorder_flat单表数据

insert into lineorder_flat select `lo_orderkey` , `lo_orderdate` , `lo_linenumber` , `lo_custkey` , `lo_partkey` , `lo_suppkey` , `lo_orderpriotity` , `lo_shippriotity` , `lo_quantity` , `lo_extendedprice` , `lo_ordtotalprice` , `lo_discount` , `lo_revenue` , `lo_supplycost` , `lo_tax` , `lo_commitdate` , `lo_shipmode` , `c_name` , `c_address` , `c_city` , `c_nation` , `c_region` , `c_phone` , `c_mktsegment` , `s_name` , `s_address` , `s_city` , `s_nation` , `s_region` , `s_phone` , `p_name` , `p_mfgr` , `p_category` , `p_brand` , `p_color` , `p_type` , `p_size` , `p_container` from lineorder l inner join customer c on (c.c_custkey = l.lo_custkey)  inner join supplier s on (s.s_suppkey = l.lo_suppkey)  inner join part p on  (p.p_partkey = l.lo_partkey) where year(lo_orderdate) in (1992, 1993);

insert into lineorder_flat select `lo_orderkey` , `lo_orderdate` , `lo_linenumber` , `lo_custkey` , `lo_partkey` , `lo_suppkey` , `lo_orderpriotity` , `lo_shippriotity` , `lo_quantity` , `lo_extendedprice` , `lo_ordtotalprice` , `lo_discount` , `lo_revenue` , `lo_supplycost` , `lo_tax` , `lo_commitdate` , `lo_shipmode` , `c_name` , `c_address` , `c_city` , `c_nation` , `c_region` , `c_phone` , `c_mktsegment` , `s_name` , `s_address` , `s_city` , `s_nation` , `s_region` , `s_phone` , `p_name` , `p_mfgr` , `p_category` , `p_brand` , `p_color` , `p_type` , `p_size` , `p_container` from lineorder l inner join customer c on (c.c_custkey = l.lo_custkey)  inner join supplier s on (s.s_suppkey = l.lo_suppkey)  inner join part p on  (p.p_partkey = l.lo_partkey) where year(lo_orderdate) in (1994, 1995);

insert into lineorder_flat select `lo_orderkey` , `lo_orderdate` , `lo_linenumber` , `lo_custkey` , `lo_partkey` , `lo_suppkey` , `lo_orderpriotity` , `lo_shippriotity` , `lo_quantity` , `lo_extendedprice` , `lo_ordtotalprice` , `lo_discount` , `lo_revenue` , `lo_supplycost` , `lo_tax` , `lo_commitdate` , `lo_shipmode` , `c_name` , `c_address` , `c_city` , `c_nation` , `c_region` , `c_phone` , `c_mktsegment` , `s_name` , `s_address` , `s_city` , `s_nation` , `s_region` , `s_phone` , `p_name` , `p_mfgr` , `p_category` , `p_brand` , `p_color` , `p_type` , `p_size` , `p_container` from lineorder l inner join customer c on (c.c_custkey = l.lo_custkey)  inner join supplier s on (s.s_suppkey = l.lo_suppkey)  inner join part p on  (p.p_partkey = l.lo_partkey) where year(lo_orderdate) in (1996, 1997);

insert into lineorder_flat select `lo_orderkey` , `lo_orderdate` , `lo_linenumber` , `lo_custkey` , `lo_partkey` , `lo_suppkey` , `lo_orderpriotity` , `lo_shippriotity` , `lo_quantity` , `lo_extendedprice` , `lo_ordtotalprice` , `lo_discount` , `lo_revenue` , `lo_supplycost` , `lo_tax` , `lo_commitdate` , `lo_shipmode` , `c_name` , `c_address` , `c_city` , `c_nation` , `c_region` , `c_phone` , `c_mktsegment` , `s_name` , `s_address` , `s_city` , `s_nation` , `s_region` , `s_phone` , `p_name` , `p_mfgr` , `p_category` , `p_brand` , `p_color` , `p_type` , `p_size` , `p_container` from lineorder l inner join customer c on (c.c_custkey = l.lo_custkey)  inner join supplier s on (s.s_suppkey = l.lo_suppkey)  inner join part p on  (p.p_partkey = l.lo_partkey) where year(lo_orderdate) in (1998);

2、执行标准SQL-单表查询

--Q1.1
SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM lineorder_flat
WHERE LO_ORDERDATE >= 19930101 and LO_ORDERDATE <= 19931231 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25;


--Q1.2
SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat 
WHERE LO_ORDERDATE >= 19940101 and LO_ORDERDATE <= 19940131 AND LO_DISCOUNT 
BETWEEN 4 AND 6 AND LO_QUANTITY BETWEEN 26 AND 35;

--Q1.3
SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM lineorder_flat
WHERE weekofyear(LO_ORDERDATE) = 6 AND LO_ORDERDATE >= 19940101 and LO_ORDERDATE <= 19941231
  AND LO_DISCOUNT BETWEEN 5 AND 7 AND LO_QUANTITY BETWEEN 26 AND 35;


--Q2.1
SELECT
    sum(LO_REVENUE),
    (LO_ORDERDATE DIV 10000) AS year,
    P_BRAND
FROM lineorder_flat
WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA'
GROUP BY
    year,
    P_BRAND
ORDER BY
    year,
    P_BRAND;


--Q2.2
SELECT
    sum(LO_REVENUE),
    (LO_ORDERDATE DIV 10000) AS year,
    P_BRAND
FROM lineorder_flat
WHERE P_BRAND >= 'MFGR#2221' AND P_BRAND <= 'MFGR#2228' AND S_REGION = 'ASIA'
GROUP BY
    year,
    P_BRAND
ORDER BY
    year,
    P_BRAND;


--Q2.3
SELECT
    sum(LO_REVENUE),
    (LO_ORDERDATE DIV 10000) AS year,
    P_BRAND
FROM lineorder_flat
WHERE P_BRAND = 'MFGR#2239' AND S_REGION = 'EUROPE'
GROUP BY
    year,
    P_BRAND
ORDER BY
    year,
    P_BRAND;


--Q3.1
SELECT
    C_NATION,
    S_NATION,
    (LO_ORDERDATE DIV 10000) AS year,
    sum(LO_REVENUE) AS revenue
FROM lineorder_flat
WHERE C_REGION = 'ASIA' AND S_REGION = 'ASIA' AND LO_ORDERDATE  >= 19920101 AND LO_ORDERDATE   <= 19971231
GROUP BY
    C_NATION,
    S_NATION,
    year
ORDER BY
    year ASC,
    revenue DESC;


--Q3.2
SELECT
    C_CITY,
    S_CITY,
    (LO_ORDERDATE DIV 10000) AS year,
    sum(LO_REVENUE) AS revenue
FROM lineorder_flat
WHERE C_NATION = 'UNITED STATES' AND S_NATION = 'UNITED STATES' AND LO_ORDERDATE  >= 19920101 AND LO_ORDERDATE <= 19971231
GROUP BY
    C_CITY,
    S_CITY,
    year
ORDER BY
    year ASC,
    revenue DESC;


--Q3.3
SELECT
    C_CITY,
    S_CITY,
    (LO_ORDERDATE DIV 10000) AS year,
    sum(LO_REVENUE) AS revenue
FROM lineorder_flat
WHERE C_CITY in ( 'UNITED KI1' ,'UNITED KI5') AND S_CITY in ( 'UNITED KI1' ,'UNITED KI5') AND LO_ORDERDATE  >= 19920101 AND LO_ORDERDATE <= 19971231
GROUP BY
    C_CITY,
    S_CITY,
    year
ORDER BY
    year ASC,
    revenue DESC;


--Q3.4
SELECT
    C_CITY,
    S_CITY,
    (LO_ORDERDATE DIV 10000) AS year,
    sum(LO_REVENUE) AS revenue
FROM lineorder_flat
WHERE C_CITY in ('UNITED KI1', 'UNITED KI5') AND S_CITY in ( 'UNITED KI1',  'UNITED KI5') AND  LO_ORDERDATE  >= 19971201 AND LO_ORDERDATE <= 19971231
GROUP BY
    C_CITY,
    S_CITY,
    year
ORDER BY
    year ASC,
    revenue DESC;


--Q4.1
SELECT
   (LO_ORDERDATE DIV 10000) AS year,
    C_NATION,
    sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM lineorder_flat
WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND P_MFGR in ( 'MFGR#1' , 'MFGR#2')
GROUP BY
    year,
    C_NATION
ORDER BY
    year ASC,
    C_NATION ASC;


--Q4.2
SELECT
   (LO_ORDERDATE DIV 10000) AS year,
    S_NATION,
    P_CATEGORY,
    sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM lineorder_flat
WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND LO_ORDERDATE >= 19970101 and LO_ORDERDATE <= 19981231 AND  P_MFGR in ( 'MFGR#1' , 'MFGR#2')
GROUP BY
    year,
    S_NATION,
    P_CATEGORY
ORDER BY
    year ASC,
    S_NATION ASC,
    P_CATEGORY ASC;


--Q4.3
SELECT
    (LO_ORDERDATE DIV 10000) AS year,
    S_CITY,
    P_BRAND,
    sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM lineorder_flat
WHERE S_NATION = 'UNITED STATES' AND LO_ORDERDATE >= 19970101 and LO_ORDERDATE <= 19981231 AND P_CATEGORY = 'MFGR#14'
GROUP BY
    year,
    S_CITY,
    P_BRAND
ORDER BY
    year ASC,
    S_CITY ASC,
    P_BRAND ASC;


2.1、单表查询结果

在这里插入图片描述

3、执行标准SQL-多表查询

--Q1.1
select sum(lo_revenue) as revenue
from lineorder join dates on lo_orderdate = d_datekey
where d_year = 1993 and lo_discount between 1 and 3 and lo_quantity < 25;

--Q1.2
select sum(lo_revenue) as revenue
from lineorder
join dates on lo_orderdate = d_datekey
where d_yearmonthnum = 199401
and lo_discount between 4 and 6
and lo_quantity between 26 and 35;


--Q1.3
select sum(lo_revenue) as revenue
from lineorder
join dates on lo_orderdate = d_datekey
where d_weeknuminyear = 6 and d_year = 1994
and lo_discount between 5 and 7
and lo_quantity between 26 and 35;


--Q2.1
select sum(lo_revenue) as lo_revenue, d_year, p_brand
from lineorder
join dates on lo_orderdate = d_datekey
join part on lo_partkey = p_partkey
join supplier on lo_suppkey = s_suppkey
where p_category = 'MFGR#12' and s_region = 'AMERICA'
group by d_year, p_brand
order by d_year, p_brand;


--Q2.2
select sum(lo_revenue) as lo_revenue, d_year, p_brand
from lineorder
join dates on lo_orderdate = d_datekey
join part on lo_partkey = p_partkey
join supplier on lo_suppkey = s_suppkey
where p_brand between 'MFGR#2221' and 'MFGR#2228' and s_region = 'ASIA'
group by d_year, p_brand
order by d_year, p_brand;


--Q2.3
select sum(lo_revenue) as lo_revenue, d_year, p_brand
from lineorder
join dates on lo_orderdate = d_datekey
join part on lo_partkey = p_partkey
join supplier on lo_suppkey = s_suppkey
where p_brand = 'MFGR#2239' and s_region = 'EUROPE'
group by d_year, p_brand
order by d_year, p_brand;


--Q3.1
select c_nation, s_nation, d_year, sum(lo_revenue) as lo_revenue
from lineorder
join dates on lo_orderdate = d_datekey
join customer on lo_custkey = c_custkey
join supplier on lo_suppkey = s_suppkey
where c_region = 'ASIA' and s_region = 'ASIA'and d_year >= 1992 and d_year <= 1997
group by c_nation, s_nation, d_year
order by d_year asc, lo_revenue desc;


--Q3.2
select c_city, s_city, d_year, sum(lo_revenue) as lo_revenue
from lineorder
join dates on lo_orderdate = d_datekey
join customer on lo_custkey = c_custkey
join supplier on lo_suppkey = s_suppkey
where c_nation = 'UNITED STATES' and s_nation = 'UNITED STATES'
and d_year >= 1992 and d_year <= 1997
group by c_city, s_city, d_year
order by d_year asc, lo_revenue desc;


--Q3.3
select c_city, s_city, d_year, sum(lo_revenue) as lo_revenue
from lineorder
join dates on lo_orderdate = d_datekey
join customer on lo_custkey = c_custkey
join supplier on lo_suppkey = s_suppkey
where (c_city='UNITED KI1' or c_city='UNITED KI5')
and (s_city='UNITED KI1' or s_city='UNITED KI5')
and d_year >= 1992 and d_year <= 1997
group by c_city, s_city, d_year
order by d_year asc, lo_revenue desc;


--Q3.4
select c_city, s_city, d_year, sum(lo_revenue) as lo_revenue
from lineorder
join dates on lo_orderdate = d_datekey
join customer on lo_custkey = c_custkey
join supplier on lo_suppkey = s_suppkey
where (c_city='UNITED KI1' or c_city='UNITED KI5') and (s_city='UNITED KI1' or s_city='UNITED KI5') and d_yearmonth = 'Dec1997'
group by c_city, s_city, d_year
order by d_year asc, lo_revenue desc;


--Q4.1
select d_year, c_nation, sum(lo_revenue) - sum(lo_supplycost) as profit
from lineorder
join dates on lo_orderdate = d_datekey
join customer on lo_custkey = c_custkey
join supplier on lo_suppkey = s_suppkey
join part on lo_partkey = p_partkey
where c_region = 'AMERICA' and s_region = 'AMERICA' and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2')
group by d_year, c_nation
order by d_year, c_nation;


--Q4.2
select d_year, s_nation, p_category, sum(lo_revenue) - sum(lo_supplycost) as profit
from lineorder
join dates on lo_orderdate = d_datekey
join customer on lo_custkey = c_custkey
join supplier on lo_suppkey = s_suppkey
join part on lo_partkey = p_partkey
where c_region = 'AMERICA'and s_region = 'AMERICA'
and (d_year = 1997 or d_year = 1998)
and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2')
group by d_year, s_nation, p_category
order by d_year, s_nation, p_category;


--Q4.3
select d_year, s_city, p_brand, sum(lo_revenue) - sum(lo_supplycost) as profit
from lineorder
join dates on lo_orderdate = d_datekey
join customer on lo_custkey = c_custkey
join supplier on lo_suppkey = s_suppkey
join part on lo_partkey = p_partkey
where c_region = 'AMERICA'and s_nation = 'UNITED STATES'
and (d_year = 1997 or d_year = 1998)
and p_category = 'MFGR#14'
group by d_year, s_city, p_brand
order by d_year, s_city, p_brand;


3.1、多表查询结果

在这里插入图片描述

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值