Clickhouse分片集群性能测试

Clickhouse分片集群性能测试

测试环境

clickhouse版本:21.11.4.14

硬件环境:

云主机CPUMem数据盘OS
ck-14c16G150Gcentos7.8
ck-24c16G150Gcentos7.8
ck-34c16G150Gcentos7.8
ck-44c16G150Gcentos7.8

数据源:

Star Schema Benchmark | ClickHouse文档

生成数据

使用-s 100dbgen 将生成 6 亿行数据(67GB), 如果使用-s 1000它会生成 60 亿行数据(这需要很多时间))

$ ./dbgen -s 100 -T c
$ ./dbgen -s 100 -T l
$ ./dbgen -s 100 -T p
$ ./dbgen -s 100 -T s

生成lineorder数据集的时候会报错:File size limit exceeded (core dumped)
临时增加file size: ulimit -f 102400000再生成一次就可以了

集群配置

    <clickhouse_remote_servers>
        <two_shards_2rep>
            <shard>
                <internal_replication>true</internal_replication>
                <replica>
                    <host>ck-1</host>
                    <port>9000</port>
                </replica>
                <replica>
                    <host>ck-2</host>
                    <port>9000</port>
                </replica>
            </shard>
            <shard>
                <internal_replication>true</internal_replication>
                <replica>
                    <host>ck-3</host>
                    <port>9000</port>
                </replica>
                <replica>
                    <host>ck-4</host>
                    <port>9000</port>
                </replica>
            </shard>
        </two_shards_2rep>
    </clickhouse_remote_servers>

建表

CREATE DATABASE data_share on cluster two_shards_2rep;

# 副本表
CREATE TABLE data_share.customer on cluster two_shards_2rep
(
        C_CUSTKEY       UInt32,
        C_NAME          String,
        C_ADDRESS       String,
        C_CITY          LowCardinality(String),
        C_NATION        LowCardinality(String),
        C_REGION        LowCardinality(String),
        C_PHONE         String,
        C_MKTSEGMENT    LowCardinality(String)
)
ENGINE = ReplicatedMergeTree('/clickhouse/table/{shard}/customer','{replica}') 
ORDER BY (C_CUSTKEY);

#分布式表
CREATE TABLE data_share.customer_all on cluster two_shards_2rep
(
        C_CUSTKEY       UInt32,
        C_NAME          String,
        C_ADDRESS       String,
        C_CITY          LowCardinality(String),
        C_NATION        LowCardinality(String),
        C_REGION        LowCardinality(String),
        C_PHONE         String,
        C_MKTSEGMENT    LowCardinality(String)
)
ENGINE = Distributed(two_shards_2rep,data_share,customer,hiveHash(C_CUSTKEY));


CREATE TABLE data_share.lineorder on cluster two_shards_2rep
(
    LO_ORDERKEY             UInt32,
    LO_LINENUMBER           UInt8,
    LO_CUSTKEY              UInt32,
    LO_PARTKEY              UInt32,
    LO_SUPPKEY              UInt32,
    LO_ORDERDATE            Date,
    LO_ORDERPRIORITY        LowCardinality(String),
    LO_SHIPPRIORITY         UInt8,
    LO_QUANTITY             UInt8,
    LO_EXTENDEDPRICE        UInt32,
    LO_ORDTOTALPRICE        UInt32,
    LO_DISCOUNT             UInt8,
    LO_REVENUE              UInt32,
    LO_SUPPLYCOST           UInt32,
    LO_TAX                  UInt8,
    LO_COMMITDATE           Date,
    LO_SHIPMODE             LowCardinality(String)
)
ENGINE = ReplicatedMergeTree('/clickhouse/table/{shard}/lineorder','{replica}') 
PARTITION BY toYear(LO_ORDERDATE)
ORDER BY (LO_ORDERDATE, LO_ORDERKEY);

CREATE TABLE data_share.lineorder_all on cluster two_shards_2rep
(
    LO_ORDERKEY             UInt32,
    LO_LINENUMBER           UInt8,
    LO_CUSTKEY              UInt32,
    LO_PARTKEY              UInt32,
    LO_SUPPKEY              UInt32,
    LO_ORDERDATE            Date,
    LO_ORDERPRIORITY        LowCardinality(String),
    LO_SHIPPRIORITY         UInt8,
    LO_QUANTITY             UInt8,
    LO_EXTENDEDPRICE        UInt32,
    LO_ORDTOTALPRICE        UInt32,
    LO_DISCOUNT             UInt8,
    LO_REVENUE              UInt32,
    LO_SUPPLYCOST           UInt32,
    LO_TAX                  UInt8,
    LO_COMMITDATE           Date,
    LO_SHIPMODE             LowCardinality(String)
)
ENGINE = Distributed(two_shards_2rep,data_share,lineorder,hiveHash(LO_ORDERKEY));

CREATE TABLE data_share.part on cluster two_shards_2rep
(
        P_PARTKEY       UInt32,
        P_NAME          String,
        P_MFGR          LowCardinality(String),
        P_CATEGORY      LowCardinality(String),
        P_BRAND         LowCardinality(String),
        P_COLOR         LowCardinality(String),
        P_TYPE          LowCardinality(String),
        P_SIZE          UInt8,
        P_CONTAINER     LowCardinality(String)
)
ENGINE = ReplicatedMergeTree('/clickhouse/table/{shard}/part','{replica}') 
ORDER BY P_PARTKEY;

CREATE TABLE data_share.part_all on cluster two_shards_2rep
(
        P_PARTKEY       UInt32,
        P_NAME          String,
        P_MFGR          LowCardinality(String),
        P_CATEGORY      LowCardinality(String),
        P_BRAND         LowCardinality(String),
        P_COLOR         LowCardinality(String),
        P_TYPE          LowCardinality(String),
        P_SIZE          UInt8,
        P_CONTAINER     LowCardinality(String)
)
ENGINE = Distributed(two_shards_2rep,data_share,part,hiveHash(P_PARTKEY));

CREATE TABLE data_share.supplier on cluster two_shards_2rep
(
        S_SUPPKEY       UInt32,
        S_NAME          String,
        S_ADDRESS       String,
        S_CITY          LowCardinality(String),
        S_NATION        LowCardinality(String),
        S_REGION        LowCardinality(String),
        S_PHONE         String
)
ENGINE = ReplicatedMergeTree('/clickhouse/table/{shard}/supplier','{replica}') 
ORDER BY S_SUPPKEY;

CREATE TABLE data_share.supplier_all on cluster two_shards_2rep
(
        S_SUPPKEY       UInt32,
        S_NAME          String,
        S_ADDRESS       String,
        S_CITY          LowCardinality(String),
        S_NATION        LowCardinality(String),
        S_REGION        LowCardinality(String),
        S_PHONE         String
)
ENGINE = Distributed(two_shards_2rep,data_share,supplier,hiveHash(S_SUPPKEY));

写入数据

$ clickhouse-client --query "INSERT INTO data_share.customer_all FORMAT CSV" < customer.tbl
$ clickhouse-client --query "INSERT INTO data_share.part FORMAT CSV" < part.tbl
$ clickhouse-client --query "INSERT INTO data_share.supplier FORMAT CSV" < supplier.tbl
$ clickhouse-client --query "INSERT INTO data_share.lineorder FORMAT CSV" < lineorder.tbl

建立宽表

SET max_memory_usage = 20000000000;

CREATE TABLE data_share.lineorder_flat on cluster two_shards_2rep
ENGINE = ReplicatedMergeTree('/clickhouse/table/{shard}/lineorder_flat','{replica}')
PARTITION BY toYear(LO_ORDERDATE)
ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS
SELECT
    l.LO_ORDERKEY AS LO_ORDERKEY,
    l.LO_LINENUMBER AS LO_LINENUMBER,
    l.LO_CUSTKEY AS LO_CUSTKEY,
    l.LO_PARTKEY AS LO_PARTKEY,
    l.LO_SUPPKEY AS LO_SUPPKEY,
    l.LO_ORDERDATE AS LO_ORDERDATE,
    l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY,
    l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY,
    l.LO_QUANTITY AS LO_QUANTITY,
    l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE,
    l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE,
    l.LO_DISCOUNT AS LO_DISCOUNT,
    l.LO_REVENUE AS LO_REVENUE,
    l.LO_SUPPLYCOST AS LO_SUPPLYCOST,
    l.LO_TAX AS LO_TAX,
    l.LO_COMMITDATE AS LO_COMMITDATE,
    l.LO_SHIPMODE AS LO_SHIPMODE,
    c.C_NAME AS C_NAME,
    c.C_ADDRESS AS C_ADDRESS,
    c.C_CITY AS C_CITY,
    c.C_NATION AS C_NATION,
    c.C_REGION AS C_REGION,
    c.C_PHONE AS C_PHONE,
    c.C_MKTSEGMENT AS C_MKTSEGMENT,
    s.S_NAME AS S_NAME,
    s.S_ADDRESS AS S_ADDRESS,
    s.S_CITY AS S_CITY,
    s.S_NATION AS S_NATION,
    s.S_REGION AS S_REGION,
    s.S_PHONE AS S_PHONE,
    p.P_NAME AS P_NAME,
    p.P_MFGR AS P_MFGR,
    p.P_CATEGORY AS P_CATEGORY,
    p.P_BRAND AS P_BRAND,
    p.P_COLOR AS P_COLOR,
    p.P_TYPE AS P_TYPE,
    p.P_SIZE AS P_SIZE,
    p.P_CONTAINER AS P_CONTAINER
FROM data_share.lineorder AS l
INNER JOIN data_share.customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEY
INNER JOIN data_share.supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY
INNER JOIN data_share.part AS p ON p.P_PARTKEY = l.LO_PARTKEY;

测试报告

写入测试

导入lineorder 表的6亿条数据耗时约20分钟;平均50w条/s;
查询:count(*)

┌───count()─┐
│ 600037902 │
└───────────┘
耗时: 0.005 sec.
分片1(ck1-ck2)
ck-1在这里插入图片描述
分片2(ck3-ck4)
在这里插入图片描述在这里插入图片描述

建立宽表

SET max_memory_usage = 20000000000;

CREATE TABLE data_share.lineorder_flat on cluster two_shards_2rep
ENGINE = ReplicatedMergeTree('/clickhouse/table/{shard}/lineorder_flat','{replica}')
PARTITION BY toYear(LO_ORDERDATE)
ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS
SELECT
    l.LO_ORDERKEY AS LO_ORDERKEY,
    l.LO_LINENUMBER AS LO_LINENUMBER,
    l.LO_CUSTKEY AS LO_CUSTKEY,
    l.LO_PARTKEY AS LO_PARTKEY,
    l.LO_SUPPKEY AS LO_SUPPKEY,
    l.LO_ORDERDATE AS LO_ORDERDATE,
    l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY,
    l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY,
    l.LO_QUANTITY AS LO_QUANTITY,
    l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE,
    l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE,
    l.LO_DISCOUNT AS LO_DISCOUNT,
    l.LO_REVENUE AS LO_REVENUE,
    l.LO_SUPPLYCOST AS LO_SUPPLYCOST,
    l.LO_TAX AS LO_TAX,
    l.LO_COMMITDATE AS LO_COMMITDATE,
    l.LO_SHIPMODE AS LO_SHIPMODE,
    c.C_NAME AS C_NAME,
    c.C_ADDRESS AS C_ADDRESS,
    c.C_CITY AS C_CITY,
    c.C_NATION AS C_NATION,
    c.C_REGION AS C_REGION,
    c.C_PHONE AS C_PHONE,
    c.C_MKTSEGMENT AS C_MKTSEGMENT,
    s.S_NAME AS S_NAME,
    s.S_ADDRESS AS S_ADDRESS,
    s.S_CITY AS S_CITY,
    s.S_NATION AS S_NATION,
    s.S_REGION AS S_REGION,
    s.S_PHONE AS S_PHONE,
    p.P_NAME AS P_NAME,
    p.P_MFGR AS P_MFGR,
    p.P_CATEGORY AS P_CATEGORY,
    p.P_BRAND AS P_BRAND,
    p.P_COLOR AS P_COLOR,
    p.P_TYPE AS P_TYPE,
    p.P_SIZE AS P_SIZE,
    p.P_CONTAINER AS P_CONTAINER
FROM data_share.lineorder AS l
INNER JOIN data_share.customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEY
INNER JOIN data_share.supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY
INNER JOIN data_share.part AS p ON p.P_PARTKEY = l.LO_PARTKEY;

汇总数据到flat表,耗时3分钟;
此时的系统性能报告如下:
在这里插入图片描述

查询效率测试

# Count(*):
┌──count()─┐
│ 66543963 │
└──────────┘
耗时: 0.002 sec.

# 查询1:
SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM lineorder_flat
WHERE toYear(LO_ORDERDATE) = 1993 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25;
┌───────revenue─┐
│ 5579054405072 │
└───────────────┘
Elapsed: 0.051 sec. Processed 11.38 million rows, 91.04 MB (222.39 million rows/s., 1.78 GB/s.)

# 查询2:
SELECT
    sum(LO_REVENUE),
    toYear(LO_ORDERDATE) AS year,
    P_BRAND
FROM lineorder_flat
WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA'
GROUP BY
    year,
    P_BRAND
ORDER BY
    year,
    P_BRAND;

┌─sum(LO_REVENUE)─┬─year─┬─P_BRAND───┐
│      87327530821992 │ MFGR#121  │82980598981992 │ MFGR#1210 │86548407441992 │ MFGR#1211 │82814524481992 │ MFGR#1212 │78727472121992 │ MFGR#1213 │80674993261992 │ MFGR#1214 │85330064181992 │ MFGR#1215 │86662149041992 │ MFGR#1216 │82435858061992 │ MFGR#1217 │
| ......		  | .... | .... 	 |48900449621998 │ MFGR#125  │41988870881998 │ MFGR#126  │50639312181998 │ MFGR#127  │51623165441998 │ MFGR#128  │44964959321998 │ MFGR#129  │
└─────────────────┴──────┴───────────┘

280 rows in set. Elapsed: 2.536 sec. Processed 74.66 million rows, 773.05 MB (29.43 million rows/s., 304.77 MB/s.)

# 查询3:
SELECT
    C_CITY,
    S_CITY,
    toYear(LO_ORDERDATE) AS year,
    sum(LO_REVENUE) AS revenue
FROM lineorder_flat
WHERE (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5') AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5') AND year >= 1992 AND year <= 1997
GROUP BY
    C_CITY,
    S_CITY,
    year
ORDER BY
    year ASC,
    revenue DESC;

┌─C_CITY─────┬─S_CITY─────┬─year─┬───revenue─┐
│ UNITED KI1 │ UNITED KI1 │ 1992819803234 │
│ UNITED KI5 │ UNITED KI5 │ 1992777564246 │
│ UNITED KI5 │ UNITED KI1 │ 1992683287196 │
│ UNITED KI1 │ UNITED KI5 │ 1992586031246 │
│ UNITED KI1 │ UNITED KI1 │ 1993828457936 │
│ UNITED KI1 │ UNITED KI5 │ 1993743591938 │
│ UNITED KI5 │ UNITED KI5 │ 1993615734476 │
│ UNITED KI5 │ UNITED KI1 │ 1993579175942 │
│ UNITED KI1 │ UNITED KI5 │ 1994770470484 │
│ UNITED KI1 │ UNITED KI1 │ 1994712789452 │
│ UNITED KI5 │ UNITED KI1 │ 1994655200664 │
│ UNITED KI5 │ UNITED KI5 │ 1994583152106 │
│ UNITED KI1 │ UNITED KI1 │ 1995733116632 │
│ UNITED KI5 │ UNITED KI1 │ 1995723067936 │
│ UNITED KI5 │ UNITED KI5 │ 1995703419120 │
│ UNITED KI1 │ UNITED KI5 │ 1995594012920 │
│ UNITED KI1 │ UNITED KI5 │ 1996740211016 │
│ UNITED KI1 │ UNITED KI1 │ 1996653797052 │
│ UNITED KI5 │ UNITED KI1 │ 1996648050389 │
│ UNITED KI5 │ UNITED KI5 │ 1996594341338 │
│ UNITED KI1 │ UNITED KI1 │ 1997753622992 │
│ UNITED KI5 │ UNITED KI5 │ 1997730044290 │
│ UNITED KI5 │ UNITED KI1 │ 1997662865918 │
│ UNITED KI1 │ UNITED KI5 │ 1997649905990 │
└────────────┴────────────┴──────┴───────────┘

24 rows in set. Elapsed: 0.283 sec. Processed 67.99 million rows, 556.43 MB (240.42 million rows/s., 1.97 GB/s.)

# 查询4:
SELECT
    toYear(LO_ORDERDATE) AS year,
    C_NATION,
    sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM lineorder_flat
WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2')
GROUP BY
    year,
    C_NATION
ORDER BY
    year ASC,
    C_NATION ASC;
┌─year─┬─C_NATION──────┬───────profit─┐
│ 1992 │ ARGENTINA     │ 129264034732 │
│ 1992 │ BRAZIL        │ 129190275718 │
│ 1992 │ CANADA        │ 127999498130 │
│ 1992 │ PERU          │ 130008432234 │
│ 1992 │ UNITED STATES │ 128121512564 │
│ 1993 │ ARGENTINA     │ 129045810754 │
│ 1993 │ BRAZIL        │ 129272278766 │
│ 1993 │ CANADA        │ 129201057096 │
│ 1993 │ PERU          │ 127234258294 │
│ 1993 │ UNITED STATES │ 127625951768 │
│ 1994 │ ARGENTINA     │ 128315941790 │
│ 1994 │ BRAZIL        │ 129750923128 │
│ 1994 │ CANADA        │ 128931983308 │
│ 1994 │ PERU          │ 127569079104 │
│ 1994 │ UNITED STATES │ 128950639058 │
│ 1995 │ ARGENTINA     │ 128653868430 │
│ 1995 │ BRAZIL        │ 129147797904 │
│ 1995 │ CANADA        │ 129487283214 │
│ 1995 │ PERU          │ 127897351820 │
│ 1995 │ UNITED STATES │ 128060931298 │
│ 1996 │ ARGENTINA     │ 125437970016 │
│ 1996 │ BRAZIL        │ 124507623177 │
│ 1996 │ CANADA        │ 124286938916 │
│ 1996 │ PERU          │ 124366141740 │
│ 1996 │ UNITED STATES │ 123662006907 │
│ 1997 │ ARGENTINA     │ 128124640660 │
│ 1997 │ BRAZIL        │ 129806833966 │
│ 1997 │ CANADA        │ 127734326768 │
│ 1997 │ PERU          │ 129729837034 │
│ 1997 │ UNITED STATES │ 128388371012 │
│ 1998 │ ARGENTINA     │  75667522918 │
│ 1998 │ BRAZIL        │  76089525024 │
│ 1998 │ CANADA        │  75113316030 │
│ 1998 │ PERU          │  76322655284 │
│ 1998 │ UNITED STATES │  75648021690 │
└──────┴───────────────┴──────────────┘

35 rows in set. Elapsed: 1.647 sec. Processed 74.66 million rows, 1.05 GB (45.33 million rows/s., 635.15 MB/s.)

好了,虽然这是比较粗略的测试,很多地方都不太严谨,但是从测试结果和系统的整体性能的使用情况来看,clickhouse真的很强!后续我在使用过程中要是有新的分享会再次的分享给大家的,感谢您的阅读。😃

  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值