Clickhouse分片集群性能测试
测试环境
clickhouse版本:21.11.4.14
硬件环境:
云主机 | CPU | Mem | 数据盘 | OS |
---|---|---|---|---|
ck-1 | 4c | 16G | 150G | centos7.8 |
ck-2 | 4c | 16G | 150G | centos7.8 |
ck-3 | 4c | 16G | 150G | centos7.8 |
ck-4 | 4c | 16G | 150G | centos7.8 |
数据源:
Star Schema Benchmark | ClickHouse文档
生成数据
使用-s 100
dbgen 将生成 6 亿行数据(67GB), 如果使用-s 1000
它会生成 60 亿行数据(这需要很多时间))
$ ./dbgen -s 100 -T c
$ ./dbgen -s 100 -T l
$ ./dbgen -s 100 -T p
$ ./dbgen -s 100 -T s
生成lineorder数据集的时候会报错:File size limit exceeded (core dumped)
临时增加file size: ulimit -f 102400000再生成一次就可以了
集群配置
<clickhouse_remote_servers>
<two_shards_2rep>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>ck-1</host>
<port>9000</port>
</replica>
<replica>
<host>ck-2</host>
<port>9000</port>
</replica>
</shard>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>ck-3</host>
<port>9000</port>
</replica>
<replica>
<host>ck-4</host>
<port>9000</port>
</replica>
</shard>
</two_shards_2rep>
</clickhouse_remote_servers>
建表
CREATE DATABASE data_share on cluster two_shards_2rep;
# 副本表
CREATE TABLE data_share.customer on cluster two_shards_2rep
(
C_CUSTKEY UInt32,
C_NAME String,
C_ADDRESS String,
C_CITY LowCardinality(String),
C_NATION LowCardinality(String),
C_REGION LowCardinality(String),
C_PHONE String,
C_MKTSEGMENT LowCardinality(String)
)
ENGINE = ReplicatedMergeTree('/clickhouse/table/{shard}/customer','{replica}')
ORDER BY (C_CUSTKEY);
#分布式表
CREATE TABLE data_share.customer_all on cluster two_shards_2rep
(
C_CUSTKEY UInt32,
C_NAME String,
C_ADDRESS String,
C_CITY LowCardinality(String),
C_NATION LowCardinality(String),
C_REGION LowCardinality(String),
C_PHONE String,
C_MKTSEGMENT LowCardinality(String)
)
ENGINE = Distributed(two_shards_2rep,data_share,customer,hiveHash(C_CUSTKEY));
CREATE TABLE data_share.lineorder on cluster two_shards_2rep
(
LO_ORDERKEY UInt32,
LO_LINENUMBER UInt8,
LO_CUSTKEY UInt32,
LO_PARTKEY UInt32,
LO_SUPPKEY UInt32,
LO_ORDERDATE Date,
LO_ORDERPRIORITY LowCardinality(String),
LO_SHIPPRIORITY UInt8,
LO_QUANTITY UInt8,
LO_EXTENDEDPRICE UInt32,
LO_ORDTOTALPRICE UInt32,
LO_DISCOUNT UInt8,
LO_REVENUE UInt32,
LO_SUPPLYCOST UInt32,
LO_TAX UInt8,
LO_COMMITDATE Date,
LO_SHIPMODE LowCardinality(String)
)
ENGINE = ReplicatedMergeTree('/clickhouse/table/{shard}/lineorder','{replica}')
PARTITION BY toYear(LO_ORDERDATE)
ORDER BY (LO_ORDERDATE, LO_ORDERKEY);
CREATE TABLE data_share.lineorder_all on cluster two_shards_2rep
(
LO_ORDERKEY UInt32,
LO_LINENUMBER UInt8,
LO_CUSTKEY UInt32,
LO_PARTKEY UInt32,
LO_SUPPKEY UInt32,
LO_ORDERDATE Date,
LO_ORDERPRIORITY LowCardinality(String),
LO_SHIPPRIORITY UInt8,
LO_QUANTITY UInt8,
LO_EXTENDEDPRICE UInt32,
LO_ORDTOTALPRICE UInt32,
LO_DISCOUNT UInt8,
LO_REVENUE UInt32,
LO_SUPPLYCOST UInt32,
LO_TAX UInt8,
LO_COMMITDATE Date,
LO_SHIPMODE LowCardinality(String)
)
ENGINE = Distributed(two_shards_2rep,data_share,lineorder,hiveHash(LO_ORDERKEY));
CREATE TABLE data_share.part on cluster two_shards_2rep
(
P_PARTKEY UInt32,
P_NAME String,
P_MFGR LowCardinality(String),
P_CATEGORY LowCardinality(String),
P_BRAND LowCardinality(String),
P_COLOR LowCardinality(String),
P_TYPE LowCardinality(String),
P_SIZE UInt8,
P_CONTAINER LowCardinality(String)
)
ENGINE = ReplicatedMergeTree('/clickhouse/table/{shard}/part','{replica}')
ORDER BY P_PARTKEY;
CREATE TABLE data_share.part_all on cluster two_shards_2rep
(
P_PARTKEY UInt32,
P_NAME String,
P_MFGR LowCardinality(String),
P_CATEGORY LowCardinality(String),
P_BRAND LowCardinality(String),
P_COLOR LowCardinality(String),
P_TYPE LowCardinality(String),
P_SIZE UInt8,
P_CONTAINER LowCardinality(String)
)
ENGINE = Distributed(two_shards_2rep,data_share,part,hiveHash(P_PARTKEY));
CREATE TABLE data_share.supplier on cluster two_shards_2rep
(
S_SUPPKEY UInt32,
S_NAME String,
S_ADDRESS String,
S_CITY LowCardinality(String),
S_NATION LowCardinality(String),
S_REGION LowCardinality(String),
S_PHONE String
)
ENGINE = ReplicatedMergeTree('/clickhouse/table/{shard}/supplier','{replica}')
ORDER BY S_SUPPKEY;
CREATE TABLE data_share.supplier_all on cluster two_shards_2rep
(
S_SUPPKEY UInt32,
S_NAME String,
S_ADDRESS String,
S_CITY LowCardinality(String),
S_NATION LowCardinality(String),
S_REGION LowCardinality(String),
S_PHONE String
)
ENGINE = Distributed(two_shards_2rep,data_share,supplier,hiveHash(S_SUPPKEY));
写入数据
$ clickhouse-client --query "INSERT INTO data_share.customer_all FORMAT CSV" < customer.tbl
$ clickhouse-client --query "INSERT INTO data_share.part FORMAT CSV" < part.tbl
$ clickhouse-client --query "INSERT INTO data_share.supplier FORMAT CSV" < supplier.tbl
$ clickhouse-client --query "INSERT INTO data_share.lineorder FORMAT CSV" < lineorder.tbl
建立宽表
SET max_memory_usage = 20000000000;
CREATE TABLE data_share.lineorder_flat on cluster two_shards_2rep
ENGINE = ReplicatedMergeTree('/clickhouse/table/{shard}/lineorder_flat','{replica}')
PARTITION BY toYear(LO_ORDERDATE)
ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS
SELECT
l.LO_ORDERKEY AS LO_ORDERKEY,
l.LO_LINENUMBER AS LO_LINENUMBER,
l.LO_CUSTKEY AS LO_CUSTKEY,
l.LO_PARTKEY AS LO_PARTKEY,
l.LO_SUPPKEY AS LO_SUPPKEY,
l.LO_ORDERDATE AS LO_ORDERDATE,
l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY,
l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY,
l.LO_QUANTITY AS LO_QUANTITY,
l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE,
l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE,
l.LO_DISCOUNT AS LO_DISCOUNT,
l.LO_REVENUE AS LO_REVENUE,
l.LO_SUPPLYCOST AS LO_SUPPLYCOST,
l.LO_TAX AS LO_TAX,
l.LO_COMMITDATE AS LO_COMMITDATE,
l.LO_SHIPMODE AS LO_SHIPMODE,
c.C_NAME AS C_NAME,
c.C_ADDRESS AS C_ADDRESS,
c.C_CITY AS C_CITY,
c.C_NATION AS C_NATION,
c.C_REGION AS C_REGION,
c.C_PHONE AS C_PHONE,
c.C_MKTSEGMENT AS C_MKTSEGMENT,
s.S_NAME AS S_NAME,
s.S_ADDRESS AS S_ADDRESS,
s.S_CITY AS S_CITY,
s.S_NATION AS S_NATION,
s.S_REGION AS S_REGION,
s.S_PHONE AS S_PHONE,
p.P_NAME AS P_NAME,
p.P_MFGR AS P_MFGR,
p.P_CATEGORY AS P_CATEGORY,
p.P_BRAND AS P_BRAND,
p.P_COLOR AS P_COLOR,
p.P_TYPE AS P_TYPE,
p.P_SIZE AS P_SIZE,
p.P_CONTAINER AS P_CONTAINER
FROM data_share.lineorder AS l
INNER JOIN data_share.customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEY
INNER JOIN data_share.supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY
INNER JOIN data_share.part AS p ON p.P_PARTKEY = l.LO_PARTKEY;
测试报告
写入测试
导入lineorder 表的6亿条数据耗时约20分钟;平均50w条/s;
查询:count(*)
┌───count()─┐
│ 600037902 │
└───────────┘
耗时: 0.005 sec.
分片1(ck1-ck2)
![]() | ![]() |
---|
分片2(ck3-ck4)
![]() | ![]() |
---|
建立宽表
SET max_memory_usage = 20000000000;
CREATE TABLE data_share.lineorder_flat on cluster two_shards_2rep
ENGINE = ReplicatedMergeTree('/clickhouse/table/{shard}/lineorder_flat','{replica}')
PARTITION BY toYear(LO_ORDERDATE)
ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS
SELECT
l.LO_ORDERKEY AS LO_ORDERKEY,
l.LO_LINENUMBER AS LO_LINENUMBER,
l.LO_CUSTKEY AS LO_CUSTKEY,
l.LO_PARTKEY AS LO_PARTKEY,
l.LO_SUPPKEY AS LO_SUPPKEY,
l.LO_ORDERDATE AS LO_ORDERDATE,
l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY,
l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY,
l.LO_QUANTITY AS LO_QUANTITY,
l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE,
l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE,
l.LO_DISCOUNT AS LO_DISCOUNT,
l.LO_REVENUE AS LO_REVENUE,
l.LO_SUPPLYCOST AS LO_SUPPLYCOST,
l.LO_TAX AS LO_TAX,
l.LO_COMMITDATE AS LO_COMMITDATE,
l.LO_SHIPMODE AS LO_SHIPMODE,
c.C_NAME AS C_NAME,
c.C_ADDRESS AS C_ADDRESS,
c.C_CITY AS C_CITY,
c.C_NATION AS C_NATION,
c.C_REGION AS C_REGION,
c.C_PHONE AS C_PHONE,
c.C_MKTSEGMENT AS C_MKTSEGMENT,
s.S_NAME AS S_NAME,
s.S_ADDRESS AS S_ADDRESS,
s.S_CITY AS S_CITY,
s.S_NATION AS S_NATION,
s.S_REGION AS S_REGION,
s.S_PHONE AS S_PHONE,
p.P_NAME AS P_NAME,
p.P_MFGR AS P_MFGR,
p.P_CATEGORY AS P_CATEGORY,
p.P_BRAND AS P_BRAND,
p.P_COLOR AS P_COLOR,
p.P_TYPE AS P_TYPE,
p.P_SIZE AS P_SIZE,
p.P_CONTAINER AS P_CONTAINER
FROM data_share.lineorder AS l
INNER JOIN data_share.customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEY
INNER JOIN data_share.supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY
INNER JOIN data_share.part AS p ON p.P_PARTKEY = l.LO_PARTKEY;
汇总数据到flat表,耗时3分钟;
此时的系统性能报告如下:
查询效率测试
# Count(*):
┌──count()─┐
│ 66543963 │
└──────────┘
耗时: 0.002 sec.
# 查询1:
SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM lineorder_flat
WHERE toYear(LO_ORDERDATE) = 1993 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25;
┌───────revenue─┐
│ 5579054405072 │
└───────────────┘
Elapsed: 0.051 sec. Processed 11.38 million rows, 91.04 MB (222.39 million rows/s., 1.78 GB/s.)
# 查询2:
SELECT
sum(LO_REVENUE),
toYear(LO_ORDERDATE) AS year,
P_BRAND
FROM lineorder_flat
WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA'
GROUP BY
year,
P_BRAND
ORDER BY
year,
P_BRAND;
┌─sum(LO_REVENUE)─┬─year─┬─P_BRAND───┐
│ 8732753082 │ 1992 │ MFGR#121 │
│ 8298059898 │ 1992 │ MFGR#1210 │
│ 8654840744 │ 1992 │ MFGR#1211 │
│ 8281452448 │ 1992 │ MFGR#1212 │
│ 7872747212 │ 1992 │ MFGR#1213 │
│ 8067499326 │ 1992 │ MFGR#1214 │
│ 8533006418 │ 1992 │ MFGR#1215 │
│ 8666214904 │ 1992 │ MFGR#1216 │
│ 8243585806 │ 1992 │ MFGR#1217 │
| ...... | .... | .... |
│ 4890044962 │ 1998 │ MFGR#125 │
│ 4198887088 │ 1998 │ MFGR#126 │
│ 5063931218 │ 1998 │ MFGR#127 │
│ 5162316544 │ 1998 │ MFGR#128 │
│ 4496495932 │ 1998 │ MFGR#129 │
└─────────────────┴──────┴───────────┘
280 rows in set. Elapsed: 2.536 sec. Processed 74.66 million rows, 773.05 MB (29.43 million rows/s., 304.77 MB/s.)
# 查询3:
SELECT
C_CITY,
S_CITY,
toYear(LO_ORDERDATE) AS year,
sum(LO_REVENUE) AS revenue
FROM lineorder_flat
WHERE (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5') AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5') AND year >= 1992 AND year <= 1997
GROUP BY
C_CITY,
S_CITY,
year
ORDER BY
year ASC,
revenue DESC;
┌─C_CITY─────┬─S_CITY─────┬─year─┬───revenue─┐
│ UNITED KI1 │ UNITED KI1 │ 1992 │ 819803234 │
│ UNITED KI5 │ UNITED KI5 │ 1992 │ 777564246 │
│ UNITED KI5 │ UNITED KI1 │ 1992 │ 683287196 │
│ UNITED KI1 │ UNITED KI5 │ 1992 │ 586031246 │
│ UNITED KI1 │ UNITED KI1 │ 1993 │ 828457936 │
│ UNITED KI1 │ UNITED KI5 │ 1993 │ 743591938 │
│ UNITED KI5 │ UNITED KI5 │ 1993 │ 615734476 │
│ UNITED KI5 │ UNITED KI1 │ 1993 │ 579175942 │
│ UNITED KI1 │ UNITED KI5 │ 1994 │ 770470484 │
│ UNITED KI1 │ UNITED KI1 │ 1994 │ 712789452 │
│ UNITED KI5 │ UNITED KI1 │ 1994 │ 655200664 │
│ UNITED KI5 │ UNITED KI5 │ 1994 │ 583152106 │
│ UNITED KI1 │ UNITED KI1 │ 1995 │ 733116632 │
│ UNITED KI5 │ UNITED KI1 │ 1995 │ 723067936 │
│ UNITED KI5 │ UNITED KI5 │ 1995 │ 703419120 │
│ UNITED KI1 │ UNITED KI5 │ 1995 │ 594012920 │
│ UNITED KI1 │ UNITED KI5 │ 1996 │ 740211016 │
│ UNITED KI1 │ UNITED KI1 │ 1996 │ 653797052 │
│ UNITED KI5 │ UNITED KI1 │ 1996 │ 648050389 │
│ UNITED KI5 │ UNITED KI5 │ 1996 │ 594341338 │
│ UNITED KI1 │ UNITED KI1 │ 1997 │ 753622992 │
│ UNITED KI5 │ UNITED KI5 │ 1997 │ 730044290 │
│ UNITED KI5 │ UNITED KI1 │ 1997 │ 662865918 │
│ UNITED KI1 │ UNITED KI5 │ 1997 │ 649905990 │
└────────────┴────────────┴──────┴───────────┘
24 rows in set. Elapsed: 0.283 sec. Processed 67.99 million rows, 556.43 MB (240.42 million rows/s., 1.97 GB/s.)
# 查询4:
SELECT
toYear(LO_ORDERDATE) AS year,
C_NATION,
sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM lineorder_flat
WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2')
GROUP BY
year,
C_NATION
ORDER BY
year ASC,
C_NATION ASC;
┌─year─┬─C_NATION──────┬───────profit─┐
│ 1992 │ ARGENTINA │ 129264034732 │
│ 1992 │ BRAZIL │ 129190275718 │
│ 1992 │ CANADA │ 127999498130 │
│ 1992 │ PERU │ 130008432234 │
│ 1992 │ UNITED STATES │ 128121512564 │
│ 1993 │ ARGENTINA │ 129045810754 │
│ 1993 │ BRAZIL │ 129272278766 │
│ 1993 │ CANADA │ 129201057096 │
│ 1993 │ PERU │ 127234258294 │
│ 1993 │ UNITED STATES │ 127625951768 │
│ 1994 │ ARGENTINA │ 128315941790 │
│ 1994 │ BRAZIL │ 129750923128 │
│ 1994 │ CANADA │ 128931983308 │
│ 1994 │ PERU │ 127569079104 │
│ 1994 │ UNITED STATES │ 128950639058 │
│ 1995 │ ARGENTINA │ 128653868430 │
│ 1995 │ BRAZIL │ 129147797904 │
│ 1995 │ CANADA │ 129487283214 │
│ 1995 │ PERU │ 127897351820 │
│ 1995 │ UNITED STATES │ 128060931298 │
│ 1996 │ ARGENTINA │ 125437970016 │
│ 1996 │ BRAZIL │ 124507623177 │
│ 1996 │ CANADA │ 124286938916 │
│ 1996 │ PERU │ 124366141740 │
│ 1996 │ UNITED STATES │ 123662006907 │
│ 1997 │ ARGENTINA │ 128124640660 │
│ 1997 │ BRAZIL │ 129806833966 │
│ 1997 │ CANADA │ 127734326768 │
│ 1997 │ PERU │ 129729837034 │
│ 1997 │ UNITED STATES │ 128388371012 │
│ 1998 │ ARGENTINA │ 75667522918 │
│ 1998 │ BRAZIL │ 76089525024 │
│ 1998 │ CANADA │ 75113316030 │
│ 1998 │ PERU │ 76322655284 │
│ 1998 │ UNITED STATES │ 75648021690 │
└──────┴───────────────┴──────────────┘
35 rows in set. Elapsed: 1.647 sec. Processed 74.66 million rows, 1.05 GB (45.33 million rows/s., 635.15 MB/s.)
好了,虽然这是比较粗略的测试,很多地方都不太严谨,但是从测试结果和系统的整体性能的使用情况来看,clickhouse真的很强!后续我在使用过程中要是有新的分享会再次的分享给大家的,感谢您的阅读。😃