1、SSB
1.1、概述
SSB(Star Schema Benchmark)是麻省州立大学波士顿校区的研究人员定义的基于现实商业应用的数据模型,业界公认用来模拟决策支持类应用,比较公正和中立。
学术界和工业界普遍采用它来评价决策支持技术方面应用的性能。
全方位评测系统的整体商业计算综合能力,对厂商的要求更高。
在银行信贷分析和信用卡分析、电信运营分析、税收分析、烟草行业决策分析中都有广泛的应用。
1.2、SSB基准测试包括
1个事实表:lineorder;
4个维度表:customer,part,dwdate,supplier;
13条标准SQL查询测试语句:统计查询、多表关联、sum、复杂条件、group by、order by等组合方式。
1.3、SSB数据生成器
生成数据文件
git clone https://github.com/electrum/ssb-dbgen.git
cd ssb-dbgen
sed -i 's/^MACHINE.*=.*/MACHINE=LINUX/' makefile # 将平台改为LINUX
make # 生成可执行程序dbgen
# 利用dbgen生成示例数据,-T指定表,-s指定数据放大系数
./dbgen -s 50 -T c # CUSTOMER表
1500000 customer.tbl
./dbgen -s 20 -T p # PART表
1000000 part.tbl
./dbgen -s 50 -T s # SUPPLIER表
100000 supplier.tbl
./dbgen -s 1 -T d # DATE_DIM表
2556 date.tbl
./dbgen -s 2 -T l # LINEORDER表
11997996 lineorder.tbl
1.4、MySQL配置
# For advice on how to change settings please see
# http://dev.mysql.com/doc/refman/5.7/en/server-configuration-defaults.html
[mysqld]
#
# Remove leading # and set to the amount of RAM for the most important data
# cache in MySQL. Start at 70% of total RAM for dedicated server, else 10%.
# innodb_buffer_pool_size = 128M
#
# Remove leading # to turn on a very important data integrity option: logging
# changes to the binary log between backups.
# log_bin
#
# Remove leading # to set options mainly useful for reporting servers.
# The server defaults are faster for transactions and fast SELECTs.
# Adjust sizes as needed, experiment to find the optimal values.
# join_buffer_size = 128M
# sort_buffer_size = 2M
# read_rnd_buffer_size = 2M
datadir=/data/mysql/data
socket=/var/lib/mysql/mysql.sock
# Disabling symbolic-links is recommended to prevent assorted security risks
symbolic-links=0
log-error=/data/mysql/log/mysqld.log
pid-file=/var/run/mysqld/mysqld.pid
#忽略大小写配置
lower_case_table_names=1
# 允许最大连接数
max_connections=200
# 允许连接失败的次数。
max_connect_errors=10
# 服务端使用的字符集默认为utf8mb4
character-set-server=utf8mb4
# 创建新表时将使用的默认存储引擎
default-storage-engine=INNODB
# 默认使用“mysql_native_password”插件认证
#mysql_native_password
default_authentication_plugin=mysql_native_password
[mysql]
# 设置mysql客户端默认字符集
default-character-set=utf8mb4
[client]
# 设置mysql客户端连接服务端时默认使用的端口
port=3306
default-character-set=utf8mb4
# ---------------- Session Buffer Cache ----------------
max_allowed_packet = 64M
2、MySQL操作
2.1、创建数据库
create database ssb;
2.2、 创建数据表
create table customer (
c_custkey integer,
c_name varchar(25) not null,
c_address varchar(40) not null,
c_city varchar(10) not null,
c_nation varchar(15) not null,
c_region varchar(12) not null,
c_phone varchar(15) not null,
c_mktsegment varchar(10) not null);
create table dates (
d_datekey integer,
d_date varchar(18) not null,
d_dayofweek varchar(18) not null,
d_month varchar(9) not null,
d_year integer not null,
d_yearmonthnum integer,
d_yearmonth varchar(7) not null,
d_daynuminweek integer,
d_daynuminmonth integer,
d_daynuminyear integer,
d_monthnuminyear integer,
d_weeknuminyear integer,
d_sellingseason varchar(12) not null,
d_lastdayinweekfl integer,
d_lastdayinmonthfl integer,
d_holidayfl integer,
d_weekdayfl integer);
create table part (
p_partkey integer,
p_name varchar(22) not null,
p_mfgr varchar(6) not null,
p_category varchar(7) not null,
p_brand varchar(9) not null,
p_color varchar(11) not null,
p_type varchar(25) not null,
p_size integer not null,
p_container varchar(10) not null);
create table supplier (
s_suppkey integer,
s_name varchar(25) not null,
s_address varchar(25) not null,
s_city varchar(10) not null,
s_nation varchar(15) not null,
s_region varchar(12) not null,
s_phone varchar(15) not null);
create table lineorder (
lo_orderkey bigint,
o_linenumber bigint,
o_custkey integer not null,
o_partkey integer not null,
o_suppkey integer not null,
o_orderdate integer not null,
o_orderpriotity varchar(15) not null,
o_shippriotity integer,
o_quantity bigint,
o_extendedprice bigint,
o_ordtotalprice bigint,
o_discount bigint,
lo_revenue bigint,
lo_supplycost bigint,
lo_tax bigint,
lo_commitdate integer not null,
lo_shipmode varchar(10) not null);
create table if not exists `lineorder_flat` (
`lo_orderkey` int(11) not null comment "",
`lo_orderdate` date not null comment "",
`lo_linenumber` tinyint(4) not null comment "",
`lo_custkey` int(11) not null comment "",
`lo_partkey` int(11) not null comment "",
`lo_suppkey` int(11) not null comment "",
`lo_orderpriotity` varchar(100) not null comment "",
`lo_shippriotity` tinyint(4) not null comment "",
`lo_quantity` tinyint(4) not null comment "",
`lo_extendedprice` int(11) not null comment "",
`lo_ordtotalprice` int(11) not null comment "",
`lo_discount` tinyint(4) not null comment "",
`lo_revenue` int(11) not null comment "",
`lo_supplycost` int(11) not null comment "",
`lo_tax` tinyint(4) not null comment "",
`lo_commitdate` date not null comment "",
`lo_shipmode` varchar(100) not null comment "",
`c_name` varchar(100) not null comment "",
`c_address` varchar(100) not null comment "",
`c_city` varchar(100) not null comment "",
`c_nation` varchar(100) not null comment "",
`c_region` varchar(100) not null comment "",
`c_phone` varchar(100) not null comment "",
`c_mktsegment` varchar(100) not null comment "",
`s_name` varchar(100) not null comment "",
`s_address` varchar(100) not null comment "",
`s_city` varchar(100) not null comment "",
`s_nation` varchar(100) not null comment "",
`s_region` varchar(100) not null comment "",
`s_phone` varchar(100) not null comment "",
`p_name` varchar(100) not null comment "",
`p_mfgr` varchar(100) not null comment "",
`p_category` varchar(100) not null comment "",
`p_brand` varchar(100) not null comment "",
`p_color` varchar(100) not null comment "",
`p_type` varchar(100) not null comment "",
`p_size` tinyint(4) not null comment "",
`p_container` varchar(100) not null comment ""
);
2.3、 导入数据
load data infile '/var/lib/mysql-files/lineorder.tbl' into table lineorder fields terminated by '|' lines terminated by '|\n';
commit;
load data infile '/var/lib/mysql-files/supplier.tbl' into table supplier fields terminated by '|' lines terminated by '|\n';
commit;
load data infile '/var/lib/mysql-files/customer.tbl' into table customer fields terminated by '|' lines terminated by '|\n';
commit;
load data infile '/var/lib/mysql-files/part.tbl' into table part fields terminated by '|' lines terminated by '|\n';
commit;
load data infile '/var/lib/mysql-files/date.tbl' into table dates fields terminated by '|' lines terminated by '|\n';
commit;
2.4、lineorder_flat insert
insert into lineorder_flat select `lo_orderkey` , `lo_orderdate` , `lo_linenumber` , `lo_custkey` , `lo_partkey` , `lo_suppkey` , `lo_orderpriotity` , `lo_shippriotity` , `lo_quantity` , `lo_extendedprice` , `lo_ordtotalprice` , `lo_discount` , `lo_revenue` , `lo_supplycost` , `lo_tax` , `lo_commitdate` , `lo_shipmode` , `c_name` , `c_address` , `c_city` , `c_nation` , `c_region` , `c_phone` , `c_mktsegment` , `s_name` , `s_address` , `s_city` , `s_nation` , `s_region` , `s_phone` , `p_name` , `p_mfgr` , `p_category` , `p_brand` , `p_color` , `p_type` , `p_size` , `p_container` from lineorder l inner join customer c on (c.c_custkey = l.lo_custkey) inner join supplier s on (s.s_suppkey = l.lo_suppkey) inner join part p on (p.p_partkey = l.lo_partkey) where year(lo_orderdate) in (1992, 1993);
insert into lineorder_flat select `lo_orderkey` , `lo_orderdate` , `lo_linenumber` , `lo_custkey` , `lo_partkey` , `lo_suppkey` , `lo_orderpriotity` , `lo_shippriotity` , `lo_quantity` , `lo_extendedprice` , `lo_ordtotalprice` , `lo_discount` , `lo_revenue` , `lo_supplycost` , `lo_tax` , `lo_commitdate` , `lo_shipmode` , `c_name` , `c_address` , `c_city` , `c_nation` , `c_region` , `c_phone` , `c_mktsegment` , `s_name` , `s_address` , `s_city` , `s_nation` , `s_region` , `s_phone` , `p_name` , `p_mfgr` , `p_category` , `p_brand` , `p_color` , `p_type` , `p_size` , `p_container` from lineorder l inner join customer c on (c.c_custkey = l.lo_custkey) inner join supplier s on (s.s_suppkey = l.lo_suppkey) inner join part p on (p.p_partkey = l.lo_partkey) where year(lo_orderdate) in (1994, 1995);
insert into lineorder_flat select `lo_orderkey` , `lo_orderdate` , `lo_linenumber` , `lo_custkey` , `lo_partkey` , `lo_suppkey` , `lo_orderpriotity` , `lo_shippriotity` , `lo_quantity` , `lo_extendedprice` , `lo_ordtotalprice` , `lo_discount` , `lo_revenue` , `lo_supplycost` , `lo_tax` , `lo_commitdate` , `lo_shipmode` , `c_name` , `c_address` , `c_city` , `c_nation` , `c_region` , `c_phone` , `c_mktsegment` , `s_name` , `s_address` , `s_city` , `s_nation` , `s_region` , `s_phone` , `p_name` , `p_mfgr` , `p_category` , `p_brand` , `p_color` , `p_type` , `p_size` , `p_container` from lineorder l inner join customer c on (c.c_custkey = l.lo_custkey) inner join supplier s on (s.s_suppkey = l.lo_suppkey) inner join part p on (p.p_partkey = l.lo_partkey) where year(lo_orderdate) in (1996, 1997);
insert into lineorder_flat select `lo_orderkey` , `lo_orderdate` , `lo_linenumber` , `lo_custkey` , `lo_partkey` , `lo_suppkey` , `lo_orderpriotity` , `lo_shippriotity` , `lo_quantity` , `lo_extendedprice` , `lo_ordtotalprice` , `lo_discount` , `lo_revenue` , `lo_supplycost` , `lo_tax` , `lo_commitdate` , `lo_shipmode` , `c_name` , `c_address` , `c_city` , `c_nation` , `c_region` , `c_phone` , `c_mktsegment` , `s_name` , `s_address` , `s_city` , `s_nation` , `s_region` , `s_phone` , `p_name` , `p_mfgr` , `p_category` , `p_brand` , `p_color` , `p_type` , `p_size` , `p_container` from lineorder l inner join customer c on (c.c_custkey = l.lo_custkey) inner join supplier s on (s.s_suppkey = l.lo_suppkey) inner join part p on (p.p_partkey = l.lo_partkey) where year(lo_orderdate) in (1998);
2.5、 创建索引
alter table dates add primary key (d_datekey);
alter table dates add index d_year(d_year);
alter table part add primary key (p_partkey);
alter table part add index p_brand(p_brand);
alter table supplier add primary key (s_suppkey);
alter table supplier add index s_city(s_city);
alter table supplier add index s_nation(s_nation);
alter table supplier add index s_region(s_region);
alter table customer add primary key (c_custkey);
alter table customer add index c_nation(c_nation);
alter table customer add index c_region(c_region);
alter table lineorder add index lo_orderkey(lo_orderkey);
alter table lineorder add index lo_linenumber(lo_linenumber);
alter table lineorder add index lo_custkey(lo_custkey);
alter table lineorder add index lo_partkey(lo_partkey);
alter table lineorder add index lo_suppkey(lo_suppkey);
alter table lineorder add index lo_orderdate(lo_orderdate);
alter table lineorder add index lo_revenue(lo_revenue);
alter table lineorder add index lo_supplycost(lo_supplycost);
alter table lineorder_flat add index lo_orderkey(lo_orderkey);
alter table lineorder_flat add index lo_orderdate(lo_orderdate);
alter table lineorder_flat add index lo_custkey(lo_custkey);
alter table lineorder_flat add index lo_partkey(lo_partkey);
alter table lineorder_flat add index lo_suppkey(lo_suppkey);
alter table lineorder_flat add index lo_quantity(lo_quantity);
alter table lineorder_flat add index p_brand(p_brand);
alter table lineorder_flat add index c_nation(c_nation);
alter table lineorder_flat add index c_city(c_city);
alter table lineorder_flat add index s_city(s_city);
alter table lineorder_flat add index c_region(c_region);
alter table lineorder_flat add index s_region(s_region);
alter table lineorder_flat add index p_mfgr(p_mfgr);
3、 执行标准SQL-多表查询
--Q1.1
select sum(lo_revenue) as revenue
from lineorder join dates on lo_orderdate = d_datekey
where d_year = 1993 and lo_discount between 1 and 3 and lo_quantity < 25;
--Q1.2
select sum(lo_revenue) as revenue
from lineorder
join dates on lo_orderdate = d_datekey
where d_yearmonthnum = 199401
and lo_discount between 4 and 6
and lo_quantity between 26 and 35;
--Q1.3
select sum(lo_revenue) as revenue
from lineorder
join dates on lo_orderdate = d_datekey
where d_weeknuminyear = 6 and d_year = 1994
and lo_discount between 5 and 7
and lo_quantity between 26 and 35;
--Q2.1
select sum(lo_revenue) as lo_revenue, d_year, p_brand
from lineorder
join dates on lo_orderdate = d_datekey
join part on lo_partkey = p_partkey
join supplier on lo_suppkey = s_suppkey
where p_category = 'MFGR#12' and s_region = 'AMERICA'
group by d_year, p_brand
order by d_year, p_brand;
--Q2.2
select sum(lo_revenue) as lo_revenue, d_year, p_brand
from lineorder
join dates on lo_orderdate = d_datekey
join part on lo_partkey = p_partkey
join supplier on lo_suppkey = s_suppkey
where p_brand between 'MFGR#2221' and 'MFGR#2228' and s_region = 'ASIA'
group by d_year, p_brand
order by d_year, p_brand;
--Q2.3
select sum(lo_revenue) as lo_revenue, d_year, p_brand
from lineorder
join dates on lo_orderdate = d_datekey
join part on lo_partkey = p_partkey
join supplier on lo_suppkey = s_suppkey
where p_brand = 'MFGR#2239' and s_region = 'EUROPE'
group by d_year, p_brand
order by d_year, p_brand;
--Q3.1
select c_nation, s_nation, d_year, sum(lo_revenue) as lo_revenue
from lineorder
join dates on lo_orderdate = d_datekey
join customer on lo_custkey = c_custkey
join supplier on lo_suppkey = s_suppkey
where c_region = 'ASIA' and s_region = 'ASIA'and d_year >= 1992 and d_year <= 1997
group by c_nation, s_nation, d_year
order by d_year asc, lo_revenue desc;
--Q3.2
select c_city, s_city, d_year, sum(lo_revenue) as lo_revenue
from lineorder
join dates on lo_orderdate = d_datekey
join customer on lo_custkey = c_custkey
join supplier on lo_suppkey = s_suppkey
where c_nation = 'UNITED STATES' and s_nation = 'UNITED STATES'
and d_year >= 1992 and d_year <= 1997
group by c_city, s_city, d_year
order by d_year asc, lo_revenue desc;
--Q3.3
select c_city, s_city, d_year, sum(lo_revenue) as lo_revenue
from lineorder
join dates on lo_orderdate = d_datekey
join customer on lo_custkey = c_custkey
join supplier on lo_suppkey = s_suppkey
where (c_city='UNITED KI1' or c_city='UNITED KI5')
and (s_city='UNITED KI1' or s_city='UNITED KI5')
and d_year >= 1992 and d_year <= 1997
group by c_city, s_city, d_year
order by d_year asc, lo_revenue desc;
--Q3.4
select c_city, s_city, d_year, sum(lo_revenue) as lo_revenue
from lineorder
join dates on lo_orderdate = d_datekey
join customer on lo_custkey = c_custkey
join supplier on lo_suppkey = s_suppkey
where (c_city='UNITED KI1' or c_city='UNITED KI5') and (s_city='UNITED KI1' or s_city='UNITED KI5') and d_yearmonth = 'Dec1997'
group by c_city, s_city, d_year
order by d_year asc, lo_revenue desc;
--Q4.1
select d_year, c_nation, sum(lo_revenue) - sum(lo_supplycost) as profit
from lineorder
join dates on lo_orderdate = d_datekey
join customer on lo_custkey = c_custkey
join supplier on lo_suppkey = s_suppkey
join part on lo_partkey = p_partkey
where c_region = 'AMERICA' and s_region = 'AMERICA' and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2')
group by d_year, c_nation
order by d_year, c_nation;
--Q4.2
select d_year, s_nation, p_category, sum(lo_revenue) - sum(lo_supplycost) as profit
from lineorder
join dates on lo_orderdate = d_datekey
join customer on lo_custkey = c_custkey
join supplier on lo_suppkey = s_suppkey
join part on lo_partkey = p_partkey
where c_region = 'AMERICA'and s_region = 'AMERICA'
and (d_year = 1997 or d_year = 1998)
and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2')
group by d_year, s_nation, p_category
order by d_year, s_nation, p_category;
--Q4.3
select d_year, s_city, p_brand, sum(lo_revenue) - sum(lo_supplycost) as profit
from lineorder
join dates on lo_orderdate = d_datekey
join customer on lo_custkey = c_custkey
join supplier on lo_suppkey = s_suppkey
join part on lo_partkey = p_partkey
where c_region = 'AMERICA'and s_nation = 'UNITED STATES'
and (d_year = 1997 or d_year = 1998)
and p_category = 'MFGR#14'
group by d_year, s_city, p_brand
order by d_year, s_city, p_brand;
3.1、多表查询结果
4、执行标准SQL-单表查询
--Q1.1
SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM lineorder_flat
WHERE LO_ORDERDATE >= 19930101 and LO_ORDERDATE <= 19931231 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25;
SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE LO_ORDERDATE >= 19940101 and LO_ORDERDATE <= 19940131 AND LO_DISCOUNT BETWEEN 4 AND 6 AND LO_QUANTITY BETWEEN 26 AND 35;
--Q1.3
SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM lineorder_flat
WHERE weekofyear(LO_ORDERDATE) = 6 AND LO_ORDERDATE >= 19940101 and LO_ORDERDATE <= 19941231
AND LO_DISCOUNT BETWEEN 5 AND 7 AND LO_QUANTITY BETWEEN 26 AND 35;
--Q2.1
SELECT
sum(LO_REVENUE),
(LO_ORDERDATE DIV 10000) AS year,
P_BRAND
FROM lineorder_flat
WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA'
GROUP BY
year,
P_BRAND
ORDER BY
year,
P_BRAND;
--Q2.2
SELECT
sum(LO_REVENUE),
(LO_ORDERDATE DIV 10000) AS year,
P_BRAND
FROM lineorder_flat
WHERE P_BRAND >= 'MFGR#2221' AND P_BRAND <= 'MFGR#2228' AND S_REGION = 'ASIA'
GROUP BY
year,
P_BRAND
ORDER BY
year,
P_BRAND;
--Q2.3
SELECT
sum(LO_REVENUE),
(LO_ORDERDATE DIV 10000) AS year,
P_BRAND
FROM lineorder_flat
WHERE P_BRAND = 'MFGR#2239' AND S_REGION = 'EUROPE'
GROUP BY
year,
P_BRAND
ORDER BY
year,
P_BRAND;
--Q3.1
SELECT
C_NATION,
S_NATION,
(LO_ORDERDATE DIV 10000) AS year,
sum(LO_REVENUE) AS revenue
FROM lineorder_flat
WHERE C_REGION = 'ASIA' AND S_REGION = 'ASIA' AND LO_ORDERDATE >= 19920101 AND LO_ORDERDATE <= 19971231
GROUP BY
C_NATION,
S_NATION,
year
ORDER BY
year ASC,
revenue DESC;
--Q3.2
SELECT
C_CITY,
S_CITY,
(LO_ORDERDATE DIV 10000) AS year,
sum(LO_REVENUE) AS revenue
FROM lineorder_flat
WHERE C_NATION = 'UNITED STATES' AND S_NATION = 'UNITED STATES' AND LO_ORDERDATE >= 19920101 AND LO_ORDERDATE <= 19971231
GROUP BY
C_CITY,
S_CITY,
year
ORDER BY
year ASC,
revenue DESC;
--Q3.3
SELECT
C_CITY,
S_CITY,
(LO_ORDERDATE DIV 10000) AS year,
sum(LO_REVENUE) AS revenue
FROM lineorder_flat
WHERE C_CITY in ( 'UNITED KI1' ,'UNITED KI5') AND S_CITY in ( 'UNITED KI1' ,'UNITED KI5') AND LO_ORDERDATE >= 19920101 AND LO_ORDERDATE <= 19971231
GROUP BY
C_CITY,
S_CITY,
year
ORDER BY
year ASC,
revenue DESC;
--Q3.4
SELECT
C_CITY,
S_CITY,
(LO_ORDERDATE DIV 10000) AS year,
sum(LO_REVENUE) AS revenue
FROM lineorder_flat
WHERE C_CITY in ('UNITED KI1', 'UNITED KI5') AND S_CITY in ( 'UNITED KI1', 'UNITED KI5') AND LO_ORDERDATE >= 19971201 AND LO_ORDERDATE <= 19971231
GROUP BY
C_CITY,
S_CITY,
year
ORDER BY
year ASC,
revenue DESC;
--Q4.1
SELECT
(LO_ORDERDATE DIV 10000) AS year,
C_NATION,
sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM lineorder_flat
WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND P_MFGR in ( 'MFGR#1' , 'MFGR#2')
GROUP BY
year,
C_NATION
ORDER BY
year ASC,
C_NATION ASC;
--Q4.2
SELECT
(LO_ORDERDATE DIV 10000) AS year,
S_NATION,
P_CATEGORY,
sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM lineorder_flat
WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND LO_ORDERDATE >= 19970101 and LO_ORDERDATE <= 19981231 AND P_MFGR in ( 'MFGR#1' , 'MFGR#2')
GROUP BY
year,
S_NATION,
P_CATEGORY
ORDER BY
year ASC,
S_NATION ASC,
P_CATEGORY ASC;
--Q4.3
SELECT
(LO_ORDERDATE DIV 10000) AS year,
S_CITY,
P_BRAND,
sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM lineorder_flat
WHERE S_NATION = 'UNITED STATES' AND LO_ORDERDATE >= 19970101 and LO_ORDERDATE <= 19981231 AND P_CATEGORY = 'MFGR#14'
GROUP BY
year,
S_CITY,
P_BRAND
ORDER BY
year ASC,
S_CITY ASC,
P_BRAND ASC;