TPC-H基准测试

前言

如果说让你比较一下两个olap引擎的查询性能怎么样,你首先想到的测试方式是什么?导入部分数据,写几个sql测试一下,那该写什么样的sql呢?测试比较权威的测试方式就出来,那就是今天讲的TPC-H基准测试。

TPC-H基准测试

此测试提供数据,而且是根据你的需求进行生产不同数量级的数据,同时提供22条sql,包含简单查询、排序、分组、关联、子查询等表达方式

安装

https://download.csdn.net/download/jklcl/47667939
不需要C币,如果csdn设置成需要的,可以给我留言,再私发

# 1.解压
unzip tpch.zip

# 2.配置
cd tpch/dbgen/
cp makefile.suite makefile
# 生成ORACLE数据库的脚本和数据
# 主要修改以下参数值
vim conf/makefile
-----
CC = gcc
DATABASE= ORACLE
MACHINE = LINUX
WORKLOAD = TPCH
-----

# 3. 生成执行命令,生成dbgen
make --生成dbgen
./dbgen -s 10 -f --生成10GB数据

# 4. 此时生产8个*.tbl的文件,转移到其他目录下
mv ./*.tbl /tmp/file/data5g

在这里插入图片描述

导入hive

Create external table lineitem (L_ORDERKEY INT, L_PARTKEY INT,L_SUPPKEY INT,L_LINENUMBER INT,L_QUANTITY DOUBLE,L_EXTENDEDPRICE DOUBLE,L_DISCOUNT DOUBLE,L_TAX DOUBLE,L_RETURNFLAG STRING,L_LINESTATUS STRING,L_SHIPDATE STRING,L_COMMITDATE STRING,L_RECEIPTDATE STRING,L_SHIPINSTRUCT STRING,L_SHIPMODE STRING,L_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tmp/lineitem';
Create external table nation (N_NATIONKEY INT,N_NAME STRING,N_REGIONKEY INT,N_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/nation';
Create external table region (R_REGIONKEY INT,R_NAME STRING,R_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/region';
Create external table part (P_PARTKEY INT , P_NAME STRING , P_MFGR STRING, P_BRAND  STRING, P_TYPE STRING , P_SIZE INT , P_CONTAINER  STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/part';
Create external table supplier (S_SUPPKEY INT , S_NAME STRING ,S_ADDRESS STRING , S_NATIONKEY  INT , S_PHONE  STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/supplier';
Create external table partsupp (PS_PARTKEY INT , PS_SUPPKEY INT , PS_AVAILQTY INT , PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/partsupp';
Create external table customer (C_CUSTKEY INT , C_NAME STRING , C_ADDRESS STRING , C_NATIONKEY  INT , C_PHONE  STRING , C_ACCTBAL DOUBLE , C_MKTSEGMENT STRING , C_COMMENT STRING ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/customer';
Create external table orders (O_ORDERKEY INT , O_CUSTKEY INT ,O_ORDERSTATUS STRING,O_TOTALPRICE DOUBLE, O_ORDERDATE DATE , O_ORDERPRIORITY STRING, O_CLERK  STRING,  O_SHIPPRIORITY  INT ,  O_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/orders';

LOAD DATA LOCAL INPATH '/tmp/file/data5g/customer.tbl' OVERWRITE INTO TABLE customer;
LOAD DATA LOCAL INPATH '/tmp/file/data5g/region.tbl' OVERWRITE INTO TABLE region;
LOAD DATA LOCAL INPATH '/tmp/file/data5g/nation.tbl' OVERWRITE INTO TABLE nation;
LOAD DATA LOCAL INPATH '/tmp/file/data5g/part.tbl'   OVERWRITE INTO TABLE part;
LOAD DATA LOCAL INPATH '/tmp/file/data5g/supplier.tbl' OVERWRITE INTO TABLE supplier;
LOAD DATA LOCAL INPATH '/tmp/file/data5g/partsupp.tbl' OVERWRITE INTO TABLE partsupp;
LOAD DATA LOCAL INPATH '/tmp/file/data5g/orders.tbl' OVERWRITE INTO TABLE orders;
LOAD DATA LOCAL INPATH '/tmp/file/data5g/lineitem.tbl' OVERWRITE INTO TABLE lineitem;

SQL

=========================================================1
select
        l_returnflag,
        l_linestatus,
        sum(l_quantity) as sum_qty,
        sum(l_extendedprice) as sum_base_price,
        sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
        sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
        avg(l_quantity) as avg_qty,
        avg(l_extendedprice) as avg_price,
        avg(l_discount) as avg_disc,
        count(*) as count_order
from
        tpch.lineitem
where
        l_shipdate <= date '1998-12-01' - interval '120' day
group by
        l_returnflag,
        l_linestatus
order by
        l_returnflag,
        l_linestatus;

=========================================================2
select
        s_acctbal,
        s_name,
        n_name,
        p_partkey,
        p_mfgr,
        s_address,
        s_phone,
        s_comment
from
        tpch.part,
        tpch.supplier,
        tpch.partsupp,
        tpch.nation,
        tpch.region
where
        p_partkey = ps_partkey
        and s_suppkey = ps_suppkey
        and p_size = 48
        and p_type like '%STEEL'
        and s_nationkey = n_nationkey
        and n_regionkey = r_regionkey
        and r_name = 'EUROPE'
        and ps_supplycost = (
                select
                        min(ps_supplycost)
                from
                        tpch.partsupp,
                        tpch.supplier,
                        tpch.nation,
                        tpch.region
                where
                        p_partkey = ps_partkey
                        and s_suppkey = ps_suppkey
                        and s_nationkey = n_nationkey
                        and n_regionkey = r_regionkey
                        and r_name = 'EUROPE'
        )
order by
        s_acctbal desc,
        n_name,
        s_name,
        p_partkey
limit 100;


=========================================================3
select
        l_orderkey,
        sum(l_extendedprice * (1 - l_discount)) as revenue,
        o_orderdate,
        o_shippriority
from
        tpch.customer,
        tpch.orders,
        tpch.lineitem
where
        c_mktsegment = 'MACHINERY'
        and c_custkey = o_custkey
        and l_orderkey = o_orderkey
        and o_orderdate < date '1995-03-23'
        and l_shipdate > date '1995-03-23'
group by
        l_orderkey,
        o_orderdate,
        o_shippriority
order by
        revenue desc,
        o_orderdate
limit 10;
=========================================================4
select
        o_orderpriority,
        count(*) as order_count
from
        tpch.orders
where
        o_orderdate >= date '1996-07-01'
        and o_orderdate < date '1996-07-01' + interval '3' month
        and exists (
                select
                        *
                from
                        tpch.lineitem
                where
                        l_orderkey = o_orderkey
                        and l_commitdate < l_receiptdate
        )
group by
        o_orderpriority
order by
        o_orderpriority;

=========================================================5
select
        n_name,
        sum(l_extendedprice * (1 - l_discount)) as revenue
from
        tpch.customer,
        tpch.orders,
        tpch.lineitem,
        tpch.supplier,
        tpch.nation,
        tpch.region
where
        c_custkey = o_custkey
        and l_orderkey = o_orderkey
        and l_suppkey = s_suppkey
        and c_nationkey = s_nationkey
        and s_nationkey = n_nationkey
        and n_regionkey = r_regionkey
        and r_name = 'EUROPE'
        and o_orderdate >= date '1996-01-01'
        and o_orderdate < date '1996-01-01' + interval '1' year
group by
        n_name
order by
        revenue desc;

=========================================================6
select
        sum(l_extendedprice * l_discount) as revenue
from
        tpch.lineitem
where
        l_shipdate >= date '1996-01-01'
        and l_shipdate < date '1996-01-01' + interval '1' year
        and l_discount between 0.02 - 0.01 and 0.02 + 0.01
        and l_quantity < 24;

=========================================================7
select
        supp_nation,
        cust_nation,
        l_year,
        sum(volume) as revenue
from
        (
                select
                        n1.n_name as supp_nation,
                        n2.n_name as cust_nation,
                        extract(year from l_shipdate) as l_year,
                        l_extendedprice * (1 - l_discount) as volume
                from
                        tpch.supplier,
                        tpch.lineitem,
                        tpch.orders,
                        tpch.customer,
                        tpch.nation n1,
                        tpch.nation n2
                where
                        s_suppkey = l_suppkey
                        and o_orderkey = l_orderkey
                        and c_custkey = o_custkey
                        and s_nationkey = n1.n_nationkey
                        and c_nationkey = n2.n_nationkey
                        and (
                                (n1.n_name = 'CANADA' and n2.n_name = 'BRAZIL')
                                or (n1.n_name = 'BRAZIL' and n2.n_name = 'CANADA')
                        )
                        and l_shipdate between date '1995-01-01' and date '1996-12-31'
        ) as shipping
group by
        supp_nation,
        cust_nation,
        l_year
order by
        supp_nation,
        cust_nation,
        l_year;

=========================================================8
select
        o_year,
        sum(case
                when nation = 'BRAZIL' then volume
                else 0
        end) / sum(volume) as mkt_share
from
        (
                select
                        extract(year from o_orderdate) as o_year,
                        l_extendedprice * (1 - l_discount) as volume,
                        n2.n_name as nation
                from
                        tpch.part,
                        tpch.supplier,
                        tpch.lineitem,
                        tpch.orders,
                        tpch.customer,
                        tpch.nation n1,
                        tpch.nation n2,
                        tpch.region
                where
                        p_partkey = l_partkey
                        and s_suppkey = l_suppkey
                        and l_orderkey = o_orderkey
                        and o_custkey = c_custkey
                        and c_nationkey = n1.n_nationkey
                        and n1.n_regionkey = r_regionkey
                        and r_name = 'AMERICA'
                        and s_nationkey = n2.n_nationkey
                        and o_orderdate between date '1995-01-01' and date '1996-12-31'
                        and p_type = 'LARGE ANODIZED COPPER'
        ) as all_nations
group by
        o_year
order by
        o_year;

=========================================================9
select
        nation,
        o_year,
        sum(amount) as sum_profit
from
        (
                select
                        n_name as nation,
                        extract(year from o_orderdate) as o_year,
                        l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
                from
                        tpch.part,
                        tpch.supplier,
                        tpch.lineitem,
                        tpch.partsupp,
                        tpch.orders,
                        tpch.nation
                where
                        s_suppkey = l_suppkey
                        and ps_suppkey = l_suppkey
                        and ps_partkey = l_partkey
                        and p_partkey = l_partkey
                        and o_orderkey = l_orderkey
                        and s_nationkey = n_nationkey
                        and p_name like '%maroon%'
        ) as profit
group by
        nation,
        o_year
order by
        nation,
        o_year desc;

=========================================================10
select
        c_custkey,
        c_name,
        sum(l_extendedprice * (1 - l_discount)) as revenue,
        c_acctbal,
        n_name,
        c_address,
        c_phone,
        c_comment
from
        tpch.customer,
        tpch.orders,
        tpch.lineitem,
        tpch.nation
where
        c_custkey = o_custkey
        and l_orderkey = o_orderkey
        and o_orderdate >= date '1993-02-01'
        and o_orderdate < date '1993-02-01' + interval '3' month
        and l_returnflag = 'R'
        and c_nationkey = n_nationkey
group by
        c_custkey,
        c_name,
        c_acctbal,
        c_phone,
        n_name,
        c_address,
        c_comment
order by
        revenue desc
limit 20;

=========================================================11

select
        ps_partkey,
        sum(ps_supplycost * ps_availqty) as value
from
        tpch.partsupp,
        tpch.supplier,
        tpch.nation
where
        ps_suppkey = s_suppkey
        and s_nationkey = n_nationkey
        and n_name = 'EGYPT'
group by
        ps_partkey having
                sum(ps_supplycost * ps_availqty) > (
                        select
                                sum(ps_supplycost * ps_availqty) * 0.0001000000
                        from
                                tpch.partsupp,
                                tpch.supplier,
                                tpch.nation
                        where
                                ps_suppkey = s_suppkey
                                and s_nationkey = n_nationkey
                                and n_name = 'EGYPT'
                )
order by
        value desc;

=========================================================12
select
        l_shipmode,
        sum(case
                when o_orderpriority = '1-URGENT'
                        or o_orderpriority = '2-HIGH'
                        then 1
                else 0
        end) as high_line_count,
        sum(case
                when o_orderpriority <> '1-URGENT'
                        and o_orderpriority <> '2-HIGH'
                        then 1
                else 0
        end) as low_line_count
from
        tpch.orders,
        tpch.lineitem
where
        o_orderkey = l_orderkey
        and l_shipmode in ('FOB', 'AIR')
        and l_commitdate < l_receiptdate
        and l_shipdate < l_commitdate
        and l_receiptdate >= date '1997-01-01'
        and l_receiptdate < date '1997-01-01' + interval '1' year
group by
        l_shipmode
order by
        l_shipmode;
=========================================================13
select
        c_count,
        count(*) as custdist
from
        (
                select
                        c_custkey,
                        count(o_orderkey) as c_count
                from
                        tpch.customer left outer join tpch.orders on
                                c_custkey = o_custkey
                                and o_comment not like '%special%deposits%'
                group by
                        c_custkey
        ) c_orders
group by
        c_count
order by
        custdist desc,
        c_count desc;

=========================================================14
select
        100.00 * sum(case
                when p_type like 'PROMO%'
                        then l_extendedprice * (1 - l_discount)
                else 0
        end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
from
        tpch.lineitem,
        tpch.part
where
        l_partkey = p_partkey
        and l_shipdate >= date '1997-06-01'
        and l_shipdate < date '1997-06-01' + interval '1' month;

=========================================================16
select
        p_brand,
        p_type,
        p_size,
        count(distinct ps_suppkey) as supplier_cnt
from
        tpch.partsupp,
        tpch.part
where
        p_partkey = ps_partkey
        and p_brand <> 'Brand#45'
        and p_type not like 'SMALL ANODIZED%'
        and p_size in (47, 15, 37, 30, 46, 16, 18, 6)
        and ps_suppkey not in (
                select
                        s_suppkey
                from
                        tpch.supplier
                where
                        s_comment like '%Customer%Complaints%'
        )
group by
        p_brand,
        p_type,
        p_size
order by
        supplier_cnt desc,
        p_brand,
        p_type,
        p_size;

=========================================================17
select
        sum(l_extendedprice) / 7.0 as avg_yearly
from
        tpch.lineitem,
        tpch.part
where
        p_partkey = l_partkey
        and p_brand = 'Brand#51'
        and p_container = 'WRAP PACK'
        and l_quantity < (
                select
                        0.2 * avg(l_quantity)
                from
                        tpch.lineitem
                where
                        l_partkey = p_partkey
        );


=========================================================18
select
        c_name,
        c_custkey,
        o_orderkey,
        o_orderdate,
        o_totalprice,
        sum(l_quantity)
from
        tpch.customer,
        tpch.orders,
        tpch.lineitem
where
        o_orderkey in (
                select
                        l_orderkey
                from
                        tpch.lineitem
                group by
                        l_orderkey having
                                sum(l_quantity) > 312
        )
        and c_custkey = o_custkey
        and o_orderkey = l_orderkey
group by
        c_name,
        c_custkey,
        o_orderkey,
        o_orderdate,
        o_totalprice
order by
        o_totalprice desc,
        o_orderdate
limit 100;

=========================================================19
select
        sum(l_extendedprice* (1 - l_discount)) as revenue
from
        tpch.lineitem,
        tpch.part
where
        (
                p_partkey = l_partkey
                and p_brand = 'Brand#52'
                and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
                and l_quantity >= 3 and l_quantity <= 3 + 10
                and p_size between 1 and 5
                and l_shipmode in ('AIR', 'AIR REG')
                and l_shipinstruct = 'DELIVER IN PERSON'
        )
        or
        (
                p_partkey = l_partkey
                and p_brand = 'Brand#43'
                and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
                and l_quantity >= 12 and l_quantity <= 12 + 10
                and p_size between 1 and 10
                and l_shipmode in ('AIR', 'AIR REG')
                and l_shipinstruct = 'DELIVER IN PERSON'
        )
        or
        (
                p_partkey = l_partkey
                and p_brand = 'Brand#52'
                and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
                and l_quantity >= 21 and l_quantity <= 21 + 10
                and p_size between 1 and 15
                and l_shipmode in ('AIR', 'AIR REG')
                and l_shipinstruct = 'DELIVER IN PERSON'
        );

=========================================================20
select
        s_name,
        s_address
from
        tpch.supplier,
        tpch.nation
where
        s_suppkey in (
                select
                        ps_suppkey
                from
                        tpch.partsupp
                where
                        ps_partkey in (
                                select
                                        p_partkey
                                from
                                        tpch.part
                                where
                                        p_name like 'drab%'
                        )
                        and ps_availqty > (
                                select
                                        0.5 * sum(l_quantity)
                                from
                                        tpch.lineitem
                                where
                                        l_partkey = ps_partkey
                                        and l_suppkey = ps_suppkey
                                        and l_shipdate >= date '1996-01-01'
                                        and l_shipdate < date '1996-01-01' + interval '1' year
                        )
        )
        and s_nationkey = n_nationkey
        and n_name = 'KENYA'
order by
        s_name;

=========================================================21
select
        s_name,
        count(*) as numwait
from
        tpch.supplier,
        tpch.lineitem l1,
        tpch.orders,
        tpch.nation
where
        s_suppkey = l1.l_suppkey
        and o_orderkey = l1.l_orderkey
        and o_orderstatus = 'F'
        and l1.l_receiptdate > l1.l_commitdate
        and exists (
                select
                        *
                from
                        tpch.lineitem l2
                where
                        l2.l_orderkey = l1.l_orderkey
                        and l2.l_suppkey <> l1.l_suppkey
        )
        and not exists (
                select
                        *
                from
                        tpch.lineitem l3
                where
                        l3.l_orderkey = l1.l_orderkey
                        and l3.l_suppkey <> l1.l_suppkey
                        and l3.l_receiptdate > l3.l_commitdate
        )
        and s_nationkey = n_nationkey
        and n_name = 'PERU'
group by
        s_name
order by
        numwait desc,
        s_name
limit 100;


=========================================================22

select
        cntrycode,
        count(*) as numcust,
        sum(c_acctbal) as totacctbal
from
        (
                select
                        substring(c_phone from 1 for 2) as cntrycode,
                        c_acctbal
                from
                        tpch.customer
                where
                        substring(c_phone from 1 for 2) in
                                ('24', '32', '17', '18', '12', '14', '22')
                        and c_acctbal > (
                                select
                                        avg(c_acctbal)
                                from
                                        tpch.customer
                                where
                                        c_acctbal > 0.00
                                        and substring(c_phone from 1 for 2) in
                                                ('24', '32', '17', '18', '12', '14', '22')
                        )
                        and not exists (
                                select
                                        *
                                from
                                        tpch.orders
                                where
                                        o_custkey = c_custkey
                        )
        ) as custsale
group by
        cntrycode
order by
        cntrycode;


  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值