9001 cm agent 端口号 9003
9010 zookeeper jmx 9011
info warning
#### 0.添加hosts
-----------------------
#### 1.创建目录
---------------
centos-1 创建目录:
mkdir -p /data1/clickhouse/{node1,node4}/{data,tmp,logs}
centos-2 创建目录:
mkdir -p /data1/clickhouse/{node2,node5}/{data,tmp,logs}
centos-3 创建目录:
mkdir -p /data1/clickhouse/{node3,node6}/{data,tmp,logs}
#### 2.在每个node创建config.xml
------------------------------
<?xml version="1.0"?>
<yandex>
<!--日志-->
<logger>
<level>warning</level>
<log>/data1/clickhouse/node3/logs/clickhouse.log</log>
<errorlog>/data1/clickhouse/node3/logs/error.log</errorlog>
<size>500M</size>
<count>5</count>
</logger>
<!--本地节点信息-->
<http_port>8123</http_port>
<tcp_port>9003</tcp_port>
<interserver_http_port>9009</interserver_http_port>
<interserver_http_host>192.168.10.67</interserver_http_host>
<!--本机域名或IP-->
<!--本地配置-->
<listen_host>0.0.0.0</listen_host>
<max_connections>2048</max_connections>
<receive_timeout>800</receive_timeout>
<send_timeout>800</send_timeout>
<keep_alive_timeout>3</keep_alive_timeout>
<!--同时处理的最大请求数-->>
<max_concurrent_queries>100</max_concurrent_queries>
<uncompressed_cache_size>4294967296</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<path>/data1/clickhouse/node3/</path>
<tmp_path>/data1/clickhouse/node3/tmp/</tmp_path>
<users_config>/data1/clickhouse/node3/users.xml</users_config>
<default_profile>default</default_profile>
<query_log>
<database>system</database>
<table>query_log</table>
<partition_by>toMonday(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
<query_thread_log>
<database>system</database>
<table>query_thread_log</table>
<partition_by>toMonday(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_thread_log>
<prometheus>
<endpoint>/metrics</endpoint>
<port>8001</port>
<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>true</asynchronous_metrics>
</prometheus>
<default_database>default</default_database>
<timezone>Asia/Shanghai</timezone>
<!--集群相关配置-->
<remote_servers incl="clickhouse_remote_servers" />
<zookeeper incl="zookeeper-servers" optional="true" />
<macros incl="macros" optional="true" />
<!--可以在不重新启动服务器的情况下“即时”修改词典,修改配置-->>
<builtin_dictionaries_reload_interval>3600</builtin_dictionaries_reload_interval>
<max_session_timeout>3600</max_session_timeout>
<default_session_timeout>300</default_session_timeout>
<max_table_size_to_drop>0</max_table_size_to_drop>
<merge_tree>
<parts_to_delay_insert>300</parts_to_delay_insert>
<parts_to_throw_insert>600</parts_to_throw_insert>
<max_delay_to_insert>2</max_delay_to_insert>
</merge_tree>
<max_table_size_to_drop>0</max_table_size_to_drop>
<max_partition_size_to_drop>0</max_partition_size_to_drop>
<distributed_ddl>
<!-- Path in ZooKeeper to queue with DDL queries -->
<path>/clickhouse/task_queue/ddl</path>
</distributed_ddl>
<include_from>/data1/clickhouse/node3/metrika.xml</include_from>
</yandex>
### 在每个node创建users.xml
---------------------------------------
<?xml version="1.0"?>
<yandex>
<profiles>
<default>
<!-- 请根据自己机器实际内存配置 -->
<!--用于在单个服务器上运行查询的最大RAM量,单位byte-->>
<max_memory_usage>54975581388</max_memory_usage>
<!--用于在单个服务器上运行用户查询的最大RAM量-->>
<max_memory_usage_for_all_queries>61847529062</max_memory_usage_for_all_queries>
<!--单个GROUP BY操作可以使用的外部磁盘的最大空间-->>
<max_bytes_before_external_group_by>21474836480</max_bytes_before_external_group_by>
<max_bytes_before_external_sort>21474836480</max_bytes_before_external_sort>
<use_uncompressed_cache>0</use_uncompressed_cache>
<load_balancing>random</load_balancing>
<distributed_aggregation_memory_efficient>1</distributed_aggregation_memory_efficient>
<!--单个查询所能使用的最大CPU个数-->>
<max_threads>8</max_threads>
<log_queries>1</log_queries>
</default>
<readonly>
<max_threads>8</max_threads>
<max_memory_usage>54975581388</max_memory_usage>
<max_memory_usage_for_all_queries>61847529062</max_memory_usage_for_all_queries>
<max_bytes_before_external_group_by>21474836480</max_bytes_before_external_group_by>
<max_bytes_before_external_sort>21474836480</max_bytes_before_external_sort>
<use_uncompressed_cache>0</use_uncompressed_cache>
<load_balancing>random</load_balancing>
<readonly>1</readonly>
<distributed_aggregation_memory_efficient>1</distributed_aggregation_memory_efficient>
<log_queries>1</log_queries>
</readonly>
</profiles>
<!--配额是针对某一用户在一段时间内的查询时间,并非单个查询限制-->>
<quotas>
<!--rule name -->>
<default>
<!--配置时间间隔-->>
<interval>
<duration>3600</duration>
<!--时间周期内允许的请求总数,0表示不限制-->>
<queries>0</queries>
<!--时间周期内允许的异常总数,0表示不限制-->>
<errors>0</errors>
<!--时间周期内允许返回的行数-->>
<result_rows>0</result_rows>
<!--时间周期内允许在分布式查询中,远端节点读取的数据行数,0表示不限制。-->>
<read_rows>0</read_rows>
<!--时间周期内允许执行的查询时间,单位是秒,0表示不限制-->>
<execution_time>0</execution_time>
</interval>
</default>
</quotas>
<users>
<!--用户名-->>
<default>
<!--要与metrika中声明的保持一致-->>
<password>123456</password>
<networks>
<!--允许所有ip访问-->
<ip>::/0</ip>
</networks>
<!--指定用户的角色-->
<profile>default</profile>
<!--指定用户的配额-->
<quota>default</quota>
</default>
<ch_ro>
<password>123456</password>
<networks>
<ip>::/0</ip>
</networks>
<profile>readonly</profile>
<quota>default</quota>
</ch_ro>
</users>
</yandex>
### 在每个node创建meterika.xml
---------------------------------------
<?xml version="1.0"?>
<yandex>
<!--ck集群节点-->
<clickhouse_remote_servers>
<!--集群名称-->>
<ch_cluster_all>
<!--分片1-->
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>cdh-slave-v04.julive.com</host>
<port>9003</port>
<user>default</user>
<!--此处的passwd要跟metrika声明中保持一致-->>
<password>123456</password>
</replica>
<!--复制集1-->
<replica>
<host>cdh-slave-v06.julive.com</host>
<port>9002</port>
<user>default</user>
<password>123456</password>
</replica>
</shard>
<!--分片2-->
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>cdh-slave-v05.julive.com</host>
<port>9003</port>
<user>default</user>
<password>123456</password>
</replica>
<!--复制集2-->
<replica>
<host>cdh-slave-v04.julive.com</host>
<port>9002</port>
<user>default</user>
<password>123456</password>
</replica>
</shard>
<!--分片3-->
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>cdh-slave-v06.julive.com</host>
<port>9003</port>
<user>default</user>
<password>123456</password>
</replica>
<!--复制集3-->
<replica>
<host>cdh-slave-v05.julive.com</host>
<port>9002</port>
<user>default</user>
<password>123456</password>
</replica>
</shard>
</ch_cluster_all>
</clickhouse_remote_servers>
<!--zookeeper相关配置-->
<zookeeper-servers>
<node index="1">
<host>cdh-slave-v01.julive.com</host>
<port>2181</port>
</node>
<node index="2">
<host>cdh-slave-v02.julive.com</host>
<port>2181</port>
</node>
<node index="3">
<host>cdh-slave-v03.julive.com</host>
<port>2181</port>
</node>
</zookeeper-servers>
<macros>
<layer>01</layer>
<shard>03</shard>
<!--分片号-->
<replica>node3</replica>
<!--当前节点代号-->
</macros>
<networks>
<ip>::/0</ip>
</networks>
<!--压缩相关配置-->
<clickhouse_compression>
<case>
<min_part_size>10000000000</min_part_size>
<min_part_size_ratio>0.01</min_part_size_ratio>
<method>lz4</method>
<!--压缩算法lz4压缩比zstd快, 更占磁盘-->
</case>
</clickhouse_compression>
</yandex>
<!-- macros规则 -->>
node1 :
layer/shard :01/01
node2:
layer/shard :01/02
node3:
layer/shard :01/03
node4:
layer/shard :01/02
node5:
layer/shard :01/03
node6:
layer/shard :01/01
#### 修改目录权限
--------------------
cd /data1 && chown -R clickhouse.clickhouse clickhouse
#### 创建进程守护
--------------------
# vim /etc/systemd/system/clickhouse_node1.service
[Unit]
Description=ClickHouse Server (analytic DBMS for big data)
Requires=network-online.target
After=network-online.target
[Service]
#Type=simple
Type=forking
User=clickhouse
Group=clickhouse
Restart=always
RestartSec=30
RuntimeDirectory=clickhouse-server
ExecStart=/usr/bin/clickhouse-server --daemon --config=/data1/clickhouse/ch_9000/config.xml --pid-file=/data1/clickhouse/node1/clickhouse-server.pid
LimitCORE=infinity
LimitNOFILE=500000
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE
[Install]
WantedBy=multi-user.target
##### 启动进程
----------------------
centos-1主机进行如下操作:
systemctl start clickhouse_node1.service
systemctl start clickhouse_node4.service
centos-2主机进行如下操作:
systemctl start clickhouse_node2.service
systemctl start clickhouse_node5.service
centos-3主机进行如下操作:
systemctl start clickhouse_node3.service
systemctl start clickhouse_node6.service
验证如下端口是否被监听:
netstat -anlp|grep 9003 (clickhouse tcp端口)
netstat -anlp|grep 9002 (clickhouse tcp端口)
netstat -anlp|grep 8123 (clickhouse http端口)
netstat -anlp|grep 8124 (clickhouse http端口)
netstat -anlp|grep 9009 (clickhouse 数据交互端口)
netstat -anlp|grep 9010 (clickhouse 数据交互端口)
##### 登录测试
--------------------
clickhouse-client -u default --password 123456 --port 9003 -h192.168.10.65 --query="show databases"
clickhouse-client -u default --password 123456 --port 9003 -h192.168.10.65 -m
#### 测试建表
create database testdb on cluster ch_cluster_all;
CREATE DATABASE testdb ON CLUSTER ck_cluster
┌─host─────┬─port─┬─status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ centos-3 │ 9000 │ 0 │ │ 5 │ 0 │
│ centos-2 │ 9000 │ 0 │ │ 4 │ 0 │
│ centos-1 │ 9002 │ 0 │ │ 3 │ 0 │
│ centos-3 │ 9002 │ 0 │ │ 2 │ 0 │
│ centos-1 │ 9000 │ 0 │ │ 1 │ 0 │
│ centos-2 │ 9002 │ 0 │ │ 0 │ 0 │
└──────────┴──────┴────────┴───────┴─────────────────────┴──────────────────┘
%s/centos-1/cdh-slave-v04.julive.com/g
%s/centos-2/cdh-slave-v05.julive.com/g
%s/centos-3/cdh-slave-v06.julive.com/g
vim /etc/systemd/system/clickhouse_node6.service
[Unit]
Description=ClickHouse Server (analytic DBMS for big data)
Requires=network-online.target
After=network-online.target
[Service]
#Type=simple
Type=forking
User=clickhouse
Group=clickhouse
Restart=always
RestartSec=30
RuntimeDirectory=clickhouse-server
ExecStart=/usr/bin/clickhouse-server --daemon --config=/data1/clickhouse/node6/config.xml --pid-file=/data1/clickhouse/node3/clickhouse-server.pid
LimitCORE=infinity
LimitNOFILE=500000
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE
[Install]
WantedBy=multi-user.target
clickhouse-client -u default --password 123456 --port 9003 -h192.168.10.65 --query="show databases"
测试表mergeTree
----------------------------
create table t_order_mt(
id UInt32,
sku_id String,
total_amount Decimal(16,2),
create_time Datetime)
engine =MergeTree
partition by toYYYYMMDD(create_time)primary key (id)order by (id,sku_id);
insert into t_order_mt values(101,'sku_001',1000.00,'2020-06-01 12:00:00') ,
(102,'sku_002',2000.00,'2020-06-01 11:00:00'),
(102,'sku_004',2500.00,'2020-06-01 12:00:00'),
(102,'sku_002',2000.00,'2020-06-01 13:00:00'),
(102,'sku_002',12000.00,'2020-06-01 13:00:00'),
(102,'sku_002',600.00,'2020-06-02 12:00:00');
insert into t_order_mt values(101,'sku_001',1000.00,'2020-06-01 12:00:00') ,
(102,'sku_002',2000.00,'2020-06-01 11:00:00'),
(102,'sku_004',2500.00,'2020-06-01 12:00:00'),
(102,'sku_002',2000.00,'2020-06-01 13:00:00'),
(102,'sku_002',12000.00,'2020-06-01 13:00:00'),
(102,'sku_002',600.00,'2020-06-02 12:00:00');
optimize table t_order_mt final;
测试表ReplicatedMergeTree
----------------------------
create table t_order_rep2(
id UInt32,sku_id String,
total_amount Decimal(16,2),
create_time Datetime)
engine =ReplicatedMergeTree('/clickhouse/table/01/t_order_rep','node1')
partition by toYYYYMMDD(create_time)
primary key (id)
order by (id,sku_id);
insert into t_order_rep2 values
(101,'sku_001',1000.00,'2020-06-01 12:00:00'),
(102,'sku_002',2000.00,'2020-06-01 12:00:00'),
(103,'sku_004',2500.00,'2020-06-01 12:00:00'),
(104,'sku_002',2000.00,'2020-06-01 12:00:00'),
(105,'sku_003',600.00,'2020-06-02 12:00:00');
create table st_order_mt1 on cluster ch_cluster_all(
id UInt32,sku_id String,
total_amount Decimal(16,2),
create_time Datetime)
engine =ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/st_order_mt','{replica}')
partition by toYYYYMMDD(create_time)
primary key (id)
order by (id,sku_id);
create table st_order_mt_all on cluster ch_cluster_all(
id UInt32,
sku_id String,
total_amount Decimal(16,2),
create_time Datetime)
engine = Distributed(ch_cluster_all,testdb, st_order_mt1,hiveHash(sku_id));
insert into st_order_mt_all2 values
(201,'sku_001',1000.00,'2020-06-01 12:00:00') ,
(202,'sku_002',2000.00,'2020-06-01 12:00:00'),
(203,'sku_004',2500.00,'2020-06-01 12:00:00'),
(204,'sku_002',2000.00,'2020-06-01 12:00:00'),
(205,'sku_003',600.00,'2020-06-02 12:00:00');
Distributed(集群名称,库名,本地表名,分片键)分片键必须是整型数字,所以用hiveHash函数转换,也可以rand()