文章目录
前言
一、服务搭建
1、准备包
官网下载地址:https://clickhouse.yandex
clickhouse-client-20.8.3.18-1.el7.x86_64.rpm
clickhouse-common-static-20.8.3.18-1.el7.x86_64.rpm
clickhouse-server-20.8.3.18-1.el7.x86_64.rpm
clickhouse-server-common-20.8.3.18-1.el7.x86_64.rpm
2、在两台服务器上分别搭建clickhouse服务
#安装命令
rpm -ivh *.rpm
#默认配置文件启动
service clickhouse-server start
#前台指定配置文件启动
clickhouse-server --config-file=/etc/clickhouse-server/config.xml
#重启
service clickhouse-server restart
二、配置调整
1、config.xml
<listen_host>::</listen_host>
<!-- <listen_host>::1</listen_host> -->
<!-- <listen_host>127.0.0.1</listen_host> -->
#副本配置的端口一样
<interserver_http_port>9009</interserver_http_port>
#对应副本服务器的ip配置,如果未配置可能造成数据副本只能单向复制数据
<interserver_http_host>xxx.xx.xx.31</interserver_http_host>
#引用准备新建的配置文件metrika.xml
<include_from>/etc/clickhouse-server/metrika.xml</include_from>
2、metrika.xml
服务器31上的配置
<yandex>
<!-- ck集群节点 -->
<clickhouse_remote_servers>
<!-- ck_cluster是我自己建的集群名 -->
<ck_cluster>
<shard>
<internal_replication>true</internal_replication>
<weight>1</weight>
<!-- 31和37互为副本 -->
<replica>
<host>xxx.xx.xx.37</host>
<port>9000</port>
<user>manage</user>
<password>jRCCCG4I1Be9VOEv</password>
</replica>
<replica>
<host>xxx.xx.xx.31</host>
<port>9000</port>
<user>manage</user>
<password>jRCCCG4I1Be9VOEv</password>
</replica>
</shard>
</ck_cluster>
</clickhouse_remote_servers>
<!--zk配置-->
<zookeeper>
<node index="1">
<host>xxx.xx.xx.32</host>
<port>2181</port>
</node>
<node index="2">
<host>xxx.xx.xx.31</host>
<port>2181</port>
</node>
<node index="3">
<host>xxx.xx.xx.33</host>
<port>2181</port>
</node>
</zookeeper>
<macros>
<shard>1</shard>
<replica>xxx.xx.xx.31</replica>
</macros>
<!--压缩相关配置-->
<clickhouse_compression>
<case>
<min_part_size>10000000000</min_part_size>
<min_part_size_ratio>0.01</min_part_size_ratio>
<!--压缩算法lz4压缩比zstd快, 更占磁盘-->
<method>lz4</method>
</case>
</clickhouse_compression>
</yandex>
服务器37上的配置
<yandex>
<!-- ck集群节点 -->
<clickhouse_remote_servers>
<!-- ck_cluster是我自己建的集群名 -->
<ck_cluster>
<shard>
<internal_replication>true</internal_replication>
<weight>1</weight>
<!-- 31和37互为副本 -->
<replica>
<host>xxx.xx.xx.31</host>
<port>9000</port>
<user>manage</user>
<password>jRCCCG4I1Be9VOEv</password>
</replica>
<replica>
<host>xxx.xx.xx.37</host>
<port>9000</port>
<user>manage</user>
<password>jRCCCG4I1Be9VOEv</password>
</replica>
</shard>
</ck_cluster>
</clickhouse_remote_servers>
<!--zk配置-->
<zookeeper>
<node index="1">
<host>xxx.xx.xx.32</host>
<port>2181</port>
</node>
<node index="2">
<host>xxx.xx.xx.31</host>
<port>2181</port>
</node>
<node index="3">
<host>xxx.xx.xx.33</host>
<port>2181</port>
</node>
</zookeeper>
<macros>
<shard>1</shard>
<replica>xxx.xx.xx.37</replica>
</macros>
<!--压缩相关配置-->
<clickhouse_compression>
<case>
<min_part_size>10000000000</min_part_size>
<min_part_size_ratio>0.01</min_part_size_ratio>
<!--压缩算法lz4压缩比zstd快, 更占磁盘-->
<method>lz4</method>
</case>
</clickhouse_compression>
</yandex>
3、users.xml
<?xml version="1.0"?>
<yandex>
<profiles>
<default>
<!-- Maximum memory usage for processing single query, in bytes. -->
<max_memory_usage>10000000000</max_memory_usage>
<use_uncompressed_cache>0</use_uncompressed_cache>
<load_balancing>random</load_balancing>
<max_partitions_per_insert_block>5000</max_partitions_per_insert_block>
<!-- 这里调大了,过小可能造成case when批量更新时,虽然未抛错,但更新未成功-->
<max_query_size>524286</max_query_size>
</default>
<readonly>
<readonly>0</readonly>
</readonly>
</profiles>
<users>
<manage>
<password>jRCCCG4I1Be9VOEv</password>
<ip>::/0</ip>
<networks incl="networks" replace="replace">
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
<access_management>1</access_management>
</manage>
<business>
<password>jRCCCG4I1Be9VOEv</password>
<ip>::/0</ip>
<networks incl="networks" replace="replace">
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
</business>
</users>
<quotas>
<default>
<interval>
<duration>3600</duration>
<queries>0</queries>
<errors>0</errors>
<result_rows>0</result_rows>
<read_rows>0</read_rows>
<execution_time>0</execution_time>
</interval>
</default>
</quotas>
</yandex>
三、试用
1、建表
--两个服务器均需建库,但建表只需在一台库上执行即可
CREATE TABLE test.dim_marketing_activities ON CLUSTER ck_cluster
(
`itemid` String COMMENT '优惠id',
`itemname` String COMMENT '优惠名称'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/dim_marketing_activities', '{replica}')
ORDER BY itemname
SETTINGS index_granularity = 8192;
2、插入数据
INSERT into cq_report_db.dim_marketing_activities (*) values
(1,'5G爱家模组(智能组网版)'),
(2,'5G叠加包家庭版(全家wifi30元档)'),
(3,'5G叠加包家庭版(全千兆30元档)'),
(4,'5G叠加包家庭版(全千兆70元档)'),
(5,'全家WiFi惠享包-158元档'),
(6,'全家WiFi惠享包-198元档'),
(7,'全家WiFi惠享包-198元档(优惠版)'),
(8,'全家WiFi惠享包2-198元档(优惠版)'),
(9,'全家WiFi惠享包3-198元档(优惠版)'),
(10,'全家WiFi惠享包5折购'),
(11,'全家WiFi惠享月包-10元档'),
(12,'全家WiFi惠享月包5折购'),
(13,'全家WiFi惠享月包-8元档'),
(14,'全家WiFi提速月包300M')
;
两个副本中任何一个副本插入数据,另外一个副本会同步生效
总结
1、自己搭建完成后,遇到了副本只能单向复制的情况,通过配置interserver_http_host解决
2、测试测试过程中,发现批量更新数量达到一定数时,更新无反应,也不抛错,通过调整max_query_size解决
3、本来clickhouse可以热加载,配置会自动生效但是需要等待,通过kill进程再启动clickhouse的方式clickhouse启动会特别慢,service clickhouse-server restart的方式更快更好。