Clickhouse分布式部署

9001 cm agent 端口号 9003


 

9010 zookeeper jmx 9011


 

info warning



 

#### 0.添加hosts

-----------------------






 

#### 1.创建目录

---------------

centos-1 创建目录:

mkdir -p /data1/clickhouse/{node1,node4}/{data,tmp,logs}

centos-2 创建目录:

mkdir -p /data1/clickhouse/{node2,node5}/{data,tmp,logs}

centos-3 创建目录:

mkdir -p /data1/clickhouse/{node3,node6}/{data,tmp,logs}







 

#### 2.在每个node创建config.xml

------------------------------

<?xml version="1.0"?>

<yandex>

<!--日志-->

<logger>

<level>warning</level>

<log>/data1/clickhouse/node3/logs/clickhouse.log</log>

<errorlog>/data1/clickhouse/node3/logs/error.log</errorlog>

<size>500M</size>

<count>5</count>

</logger>

<!--本地节点信息-->

<http_port>8123</http_port>

<tcp_port>9003</tcp_port>

<interserver_http_port>9009</interserver_http_port>

<interserver_http_host>192.168.10.67</interserver_http_host>

<!--本机域名或IP-->

<!--本地配置-->

<listen_host>0.0.0.0</listen_host>

<max_connections>2048</max_connections>

<receive_timeout>800</receive_timeout>

<send_timeout>800</send_timeout>

<keep_alive_timeout>3</keep_alive_timeout>

<!--同时处理的最大请求数-->>

<max_concurrent_queries>100</max_concurrent_queries>

<uncompressed_cache_size>4294967296</uncompressed_cache_size>

<mark_cache_size>5368709120</mark_cache_size>

<path>/data1/clickhouse/node3/</path>

<tmp_path>/data1/clickhouse/node3/tmp/</tmp_path>

<users_config>/data1/clickhouse/node3/users.xml</users_config>

<default_profile>default</default_profile>

<query_log>

<database>system</database>

<table>query_log</table>

<partition_by>toMonday(event_date)</partition_by>

<flush_interval_milliseconds>7500</flush_interval_milliseconds>

</query_log>

<query_thread_log>

<database>system</database>

<table>query_thread_log</table>

<partition_by>toMonday(event_date)</partition_by>

<flush_interval_milliseconds>7500</flush_interval_milliseconds>

</query_thread_log>

<prometheus>

<endpoint>/metrics</endpoint>

<port>8001</port>

<metrics>true</metrics>

<events>true</events>

<asynchronous_metrics>true</asynchronous_metrics>

</prometheus>

<default_database>default</default_database>

<timezone>Asia/Shanghai</timezone>

<!--集群相关配置-->

<remote_servers incl="clickhouse_remote_servers" />

<zookeeper incl="zookeeper-servers" optional="true" />

<macros incl="macros" optional="true" />

<!--可以在不重新启动服务器的情况下“即时”修改词典,修改配置-->>

<builtin_dictionaries_reload_interval>3600</builtin_dictionaries_reload_interval>

<max_session_timeout>3600</max_session_timeout>

<default_session_timeout>300</default_session_timeout>

<max_table_size_to_drop>0</max_table_size_to_drop>

<merge_tree>

<parts_to_delay_insert>300</parts_to_delay_insert>

<parts_to_throw_insert>600</parts_to_throw_insert>

<max_delay_to_insert>2</max_delay_to_insert>

</merge_tree>

<max_table_size_to_drop>0</max_table_size_to_drop>

<max_partition_size_to_drop>0</max_partition_size_to_drop>

<distributed_ddl>

<!-- Path in ZooKeeper to queue with DDL queries -->

<path>/clickhouse/task_queue/ddl</path>

</distributed_ddl>

<include_from>/data1/clickhouse/node3/metrika.xml</include_from>

</yandex>



 

### 在每个node创建users.xml

---------------------------------------

<?xml version="1.0"?>

<yandex>

<profiles>

<default>

<!-- 请根据自己机器实际内存配置 -->

<!--用于在单个服务器上运行查询的最大RAM量,单位byte-->>

<max_memory_usage>54975581388</max_memory_usage>

<!--用于在单个服务器上运行用户查询的最大RAM量-->>

<max_memory_usage_for_all_queries>61847529062</max_memory_usage_for_all_queries>

<!--单个GROUP BY操作可以使用的外部磁盘的最大空间-->>

<max_bytes_before_external_group_by>21474836480</max_bytes_before_external_group_by>

<max_bytes_before_external_sort>21474836480</max_bytes_before_external_sort>

<use_uncompressed_cache>0</use_uncompressed_cache>

<load_balancing>random</load_balancing>

<distributed_aggregation_memory_efficient>1</distributed_aggregation_memory_efficient>

<!--单个查询所能使用的最大CPU个数-->>

<max_threads>8</max_threads>

<log_queries>1</log_queries>

</default>

<readonly>

<max_threads>8</max_threads>

<max_memory_usage>54975581388</max_memory_usage>

<max_memory_usage_for_all_queries>61847529062</max_memory_usage_for_all_queries>

<max_bytes_before_external_group_by>21474836480</max_bytes_before_external_group_by>

<max_bytes_before_external_sort>21474836480</max_bytes_before_external_sort>

<use_uncompressed_cache>0</use_uncompressed_cache>

<load_balancing>random</load_balancing>

<readonly>1</readonly>

<distributed_aggregation_memory_efficient>1</distributed_aggregation_memory_efficient>

<log_queries>1</log_queries>

</readonly>

</profiles>

<!--配额是针对某一用户在一段时间内的查询时间,并非单个查询限制-->>

<quotas>

<!--rule name -->>

<default>

<!--配置时间间隔-->>

<interval>

<duration>3600</duration>

<!--时间周期内允许的请求总数,0表示不限制-->>

<queries>0</queries>

<!--时间周期内允许的异常总数,0表示不限制-->>

<errors>0</errors>

<!--时间周期内允许返回的行数-->>

<result_rows>0</result_rows>

<!--时间周期内允许在分布式查询中,远端节点读取的数据行数,0表示不限制。-->>

<read_rows>0</read_rows>

<!--时间周期内允许执行的查询时间,单位是秒,0表示不限制-->>

<execution_time>0</execution_time>

</interval>

</default>

</quotas>

<users>

<!--用户名-->>

<default>

<!--要与metrika中声明的保持一致-->>

<password>123456</password>

<networks>

<!--允许所有ip访问-->

<ip>::/0</ip>

</networks>

<!--指定用户的角色-->

<profile>default</profile>

<!--指定用户的配额-->

<quota>default</quota>

</default>

<ch_ro>

<password>123456</password>

<networks>

<ip>::/0</ip>

</networks>

<profile>readonly</profile>

<quota>default</quota>

</ch_ro>

</users>

</yandex>



 

### 在每个node创建meterika.xml

---------------------------------------

<?xml version="1.0"?>

<yandex>

<!--ck集群节点-->

<clickhouse_remote_servers>

<!--集群名称-->>

<ch_cluster_all>

<!--分片1-->

<shard>

<internal_replication>true</internal_replication>

<replica>

<host>cdh-slave-v04.julive.com</host>

<port>9003</port>

<user>default</user>

<!--此处的passwd要跟metrika声明中保持一致-->>

<password>123456</password>

</replica>

<!--复制集1-->

<replica>

<host>cdh-slave-v06.julive.com</host>

<port>9002</port>

<user>default</user>

<password>123456</password>

</replica>

</shard>

<!--分片2-->

<shard>

<internal_replication>true</internal_replication>

<replica>

<host>cdh-slave-v05.julive.com</host>

<port>9003</port>

<user>default</user>

<password>123456</password>

</replica>

<!--复制集2-->

<replica>

<host>cdh-slave-v04.julive.com</host>

<port>9002</port>

<user>default</user>

<password>123456</password>

</replica>

</shard>

<!--分片3-->

<shard>

<internal_replication>true</internal_replication>

<replica>

<host>cdh-slave-v06.julive.com</host>

<port>9003</port>

<user>default</user>

<password>123456</password>

</replica>

<!--复制集3-->

<replica>

<host>cdh-slave-v05.julive.com</host>

<port>9002</port>

<user>default</user>

<password>123456</password>

</replica>

</shard>

</ch_cluster_all>

</clickhouse_remote_servers>

<!--zookeeper相关配置-->

<zookeeper-servers>

<node index="1">

<host>cdh-slave-v01.julive.com</host>

<port>2181</port>

</node>

<node index="2">

<host>cdh-slave-v02.julive.com</host>

<port>2181</port>

</node>

<node index="3">

<host>cdh-slave-v03.julive.com</host>

<port>2181</port>

</node>

</zookeeper-servers>

<macros>

<layer>01</layer>

<shard>03</shard>

<!--分片号-->

<replica>node3</replica>

<!--当前节点代号-->

</macros>

<networks>

<ip>::/0</ip>

</networks>

<!--压缩相关配置-->

<clickhouse_compression>

<case>

<min_part_size>10000000000</min_part_size>

<min_part_size_ratio>0.01</min_part_size_ratio>

<method>lz4</method>

<!--压缩算法lz4压缩比zstd快, 更占磁盘-->

</case>

</clickhouse_compression>

</yandex>


 

<!-- macros规则 -->>

node1 :

layer/shard :01/01

node2:

layer/shard :01/02

node3:

layer/shard :01/03

node4:

layer/shard :01/02

node5:

layer/shard :01/03

node6:

layer/shard :01/01




 

#### 修改目录权限

--------------------

cd /data1 && chown -R clickhouse.clickhouse clickhouse


 

#### 创建进程守护

--------------------

# vim /etc/systemd/system/clickhouse_node1.service

[Unit]

Description=ClickHouse Server (analytic DBMS for big data)

Requires=network-online.target

After=network-online.target

[Service]

#Type=simple

Type=forking

User=clickhouse

Group=clickhouse

Restart=always

RestartSec=30

RuntimeDirectory=clickhouse-server

ExecStart=/usr/bin/clickhouse-server --daemon --config=/data1/clickhouse/ch_9000/config.xml --pid-file=/data1/clickhouse/node1/clickhouse-server.pid

LimitCORE=infinity

LimitNOFILE=500000

CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE

[Install]

WantedBy=multi-user.target



 

##### 启动进程

----------------------

centos-1主机进行如下操作:

systemctl start clickhouse_node1.service

systemctl start clickhouse_node4.service

centos-2主机进行如下操作:

systemctl start clickhouse_node2.service

systemctl start clickhouse_node5.service

centos-3主机进行如下操作:

systemctl start clickhouse_node3.service

systemctl start clickhouse_node6.service

验证如下端口是否被监听:

netstat -anlp|grep 9003 (clickhouse tcp端口)

netstat -anlp|grep 9002 (clickhouse tcp端口)

netstat -anlp|grep 8123 (clickhouse http端口)

netstat -anlp|grep 8124 (clickhouse http端口)

netstat -anlp|grep 9009 (clickhouse 数据交互端口)

netstat -anlp|grep 9010 (clickhouse 数据交互端口)




 

##### 登录测试

--------------------

clickhouse-client -u default --password 123456 --port 9003 -h192.168.10.65 --query="show databases"

clickhouse-client -u default --password 123456 --port 9003 -h192.168.10.65 -m




 

#### 测试建表

create database testdb on cluster ch_cluster_all;

CREATE DATABASE testdb ON CLUSTER ck_cluster

┌─host─────┬─port─┬─status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐

│ centos-3 │ 9000 │ 0 │ │ 5 │ 0 │

│ centos-2 │ 9000 │ 0 │ │ 4 │ 0 │

│ centos-1 │ 9002 │ 0 │ │ 3 │ 0 │

│ centos-3 │ 9002 │ 0 │ │ 2 │ 0 │

│ centos-1 │ 9000 │ 0 │ │ 1 │ 0 │

│ centos-2 │ 9002 │ 0 │ │ 0 │ 0 │

└──────────┴──────┴────────┴───────┴─────────────────────┴──────────────────┘







 

%s/centos-1/cdh-slave-v04.julive.com/g


 

%s/centos-2/cdh-slave-v05.julive.com/g


 

%s/centos-3/cdh-slave-v06.julive.com/g




 

vim /etc/systemd/system/clickhouse_node6.service

[Unit]

Description=ClickHouse Server (analytic DBMS for big data)

Requires=network-online.target

After=network-online.target

[Service]

#Type=simple

Type=forking

User=clickhouse

Group=clickhouse

Restart=always

RestartSec=30

RuntimeDirectory=clickhouse-server

ExecStart=/usr/bin/clickhouse-server --daemon --config=/data1/clickhouse/node6/config.xml --pid-file=/data1/clickhouse/node3/clickhouse-server.pid

LimitCORE=infinity

LimitNOFILE=500000

CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE

[Install]

WantedBy=multi-user.target



 

clickhouse-client -u default --password 123456 --port 9003 -h192.168.10.65 --query="show databases"




 

测试表mergeTree

----------------------------

create table t_order_mt(

id UInt32,

sku_id String,

total_amount Decimal(16,2),

create_time Datetime)

engine =MergeTree

partition by toYYYYMMDD(create_time)primary key (id)order by (id,sku_id);


 

insert into t_order_mt values(101,'sku_001',1000.00,'2020-06-01 12:00:00') ,

(102,'sku_002',2000.00,'2020-06-01 11:00:00'),

(102,'sku_004',2500.00,'2020-06-01 12:00:00'),

(102,'sku_002',2000.00,'2020-06-01 13:00:00'),

(102,'sku_002',12000.00,'2020-06-01 13:00:00'),

(102,'sku_002',600.00,'2020-06-02 12:00:00');


 

insert into t_order_mt values(101,'sku_001',1000.00,'2020-06-01 12:00:00') ,

(102,'sku_002',2000.00,'2020-06-01 11:00:00'),

(102,'sku_004',2500.00,'2020-06-01 12:00:00'),

(102,'sku_002',2000.00,'2020-06-01 13:00:00'),

(102,'sku_002',12000.00,'2020-06-01 13:00:00'),

(102,'sku_002',600.00,'2020-06-02 12:00:00');


 

optimize table t_order_mt final;







 

测试表ReplicatedMergeTree

----------------------------

create table t_order_rep2(

id UInt32,sku_id String,

total_amount Decimal(16,2),

create_time Datetime)

engine =ReplicatedMergeTree('/clickhouse/table/01/t_order_rep','node1')

partition by toYYYYMMDD(create_time)

primary key (id)

order by (id,sku_id);




 

insert into t_order_rep2 values

(101,'sku_001',1000.00,'2020-06-01 12:00:00'),

(102,'sku_002',2000.00,'2020-06-01 12:00:00'),

(103,'sku_004',2500.00,'2020-06-01 12:00:00'),

(104,'sku_002',2000.00,'2020-06-01 12:00:00'),

(105,'sku_003',600.00,'2020-06-02 12:00:00');





 

create table st_order_mt1 on cluster ch_cluster_all(

id UInt32,sku_id String,

total_amount Decimal(16,2),

create_time Datetime)

engine =ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/st_order_mt','{replica}')

partition by toYYYYMMDD(create_time)

primary key (id)

order by (id,sku_id);




 

create table st_order_mt_all on cluster ch_cluster_all(

id UInt32,

sku_id String,

total_amount Decimal(16,2),

create_time Datetime)

engine = Distributed(ch_cluster_all,testdb, st_order_mt1,hiveHash(sku_id));



 

insert into st_order_mt_all2 values

(201,'sku_001',1000.00,'2020-06-01 12:00:00') ,

(202,'sku_002',2000.00,'2020-06-01 12:00:00'),

(203,'sku_004',2500.00,'2020-06-01 12:00:00'),

(204,'sku_002',2000.00,'2020-06-01 12:00:00'),

(205,'sku_003',600.00,'2020-06-02 12:00:00');


 

Distributed(集群名称,库名,本地表名,分片键)分片键必须是整型数字,所以用hiveHash函数转换,也可以rand()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值