Clickhouse分布式部署

最新推荐文章于 2024-08-26 17:55:59 发布

Alex_Sheng_Sea

最新推荐文章于 2024-08-26 17:55:59 发布

阅读量1.2k

点赞数

分类专栏： clickhouse

本文链接：https://blog.csdn.net/Alex_Sheng_Sea/article/details/119579244

版权

ClickHouse 集群配置 Zookeeper 分布式

关键词由CSDN通过智能技术生成

clickhouse 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

9001 cm agent 端口号 9003

9010 zookeeper jmx 9011

info warning

#### 0.添加hosts

-----------------------

#### 1.创建目录

---------------

centos-1 创建目录：

mkdir -p /data1/clickhouse/{node1,node4}/{data,tmp,logs}

centos-2 创建目录：

mkdir -p /data1/clickhouse/{node2,node5}/{data,tmp,logs}

centos-3 创建目录：

mkdir -p /data1/clickhouse/{node3,node6}/{data,tmp,logs}

#### 2.在每个node创建config.xml

------------------------------

<?xml version="1.0"?>

<level>warning</level>

<log>/data1/clickhouse/node3/logs/clickhouse.log</log>

<errorlog>/data1/clickhouse/node3/logs/error.log</errorlog>

</logger>

<http_port>8123</http_port>

<tcp_port>9003</tcp_port>

<interserver_http_port>9009</interserver_http_port>

<interserver_http_host>192.168.10.67</interserver_http_host>

<listen_host>0.0.0.0</listen_host>

<max_connections>2048</max_connections>

<receive_timeout>800</receive_timeout>

<send_timeout>800</send_timeout>

<keep_alive_timeout>3</keep_alive_timeout>

>

<max_concurrent_queries>100</max_concurrent_queries>

<uncompressed_cache_size>4294967296</uncompressed_cache_size>

<mark_cache_size>5368709120</mark_cache_size>

<path>/data1/clickhouse/node3/</path>

<tmp_path>/data1/clickhouse/node3/tmp/</tmp_path>

<users_config>/data1/clickhouse/node3/users.xml</users_config>

<default_profile>default</default_profile>

<query_log>

<database>system</database>

<table>query_log</table>

<partition_by>toMonday(event_date)</partition_by>

<flush_interval_milliseconds>7500</flush_interval_milliseconds>

</query_log>

<query_thread_log>

<database>system</database>

<table>query_thread_log</table>

<partition_by>toMonday(event_date)</partition_by>

<flush_interval_milliseconds>7500</flush_interval_milliseconds>

</query_thread_log>

<endpoint>/metrics</endpoint>

<asynchronous_metrics>true</asynchronous_metrics>

</prometheus>

<default_database>default</default_database>

<timezone>Asia/Shanghai</timezone>

<remote_servers incl="clickhouse_remote_servers" />

>

<builtin_dictionaries_reload_interval>3600</builtin_dictionaries_reload_interval>

<max_session_timeout>3600</max_session_timeout>

<default_session_timeout>300</default_session_timeout>

<max_table_size_to_drop>0</max_table_size_to_drop>

<merge_tree>

<parts_to_delay_insert>300</parts_to_delay_insert>

<parts_to_throw_insert>600</parts_to_throw_insert>

<max_delay_to_insert>2</max_delay_to_insert>

</merge_tree>

<max_table_size_to_drop>0</max_table_size_to_drop>

<max_partition_size_to_drop>0</max_partition_size_to_drop>

<distributed_ddl>

<path>/clickhouse/task_queue/ddl</path>

</distributed_ddl>

<include_from>/data1/clickhouse/node3/metrika.xml</include_from>

</yandex>

### 在每个node创建users.xml

---------------------------------------

<?xml version="1.0"?>

>

<max_memory_usage>54975581388</max_memory_usage>

>

<max_memory_usage_for_all_queries>61847529062</max_memory_usage_for_all_queries>

>

<max_bytes_before_external_group_by>21474836480</max_bytes_before_external_group_by>

<max_bytes_before_external_sort>21474836480</max_bytes_before_external_sort>

<use_uncompressed_cache>0</use_uncompressed_cache>

<load_balancing>random</load_balancing>

<distributed_aggregation_memory_efficient>1</distributed_aggregation_memory_efficient>

>

<max_threads>8</max_threads>

<log_queries>1</log_queries>

</default>

<max_threads>8</max_threads>

<max_memory_usage>54975581388</max_memory_usage>

<max_memory_usage_for_all_queries>61847529062</max_memory_usage_for_all_queries>

<max_bytes_before_external_group_by>21474836480</max_bytes_before_external_group_by>

<max_bytes_before_external_sort>21474836480</max_bytes_before_external_sort>

<use_uncompressed_cache>0</use_uncompressed_cache>

<load_balancing>random</load_balancing>

<distributed_aggregation_memory_efficient>1</distributed_aggregation_memory_efficient>

<log_queries>1</log_queries>

</readonly>

</profiles>

>

>

>

>

>

>

<result_rows>0</result_rows>

>

<read_rows>0</read_rows>

>

<execution_time>0</execution_time>

</interval>

</default>

</quotas>

<users>

>

>

</networks>

<profile>default</profile>

<quota>default</quota>

</default>

<ch_ro>

</networks>

<profile>readonly</profile>

<quota>default</quota>

</ch_ro>

</users>

</yandex>

### 在每个node创建meterika.xml

---------------------------------------

<?xml version="1.0"?>

<clickhouse_remote_servers>

>

<ch_cluster_all>

<shard>

<internal_replication>true</internal_replication>

<host>cdh-slave-v04.julive.com</host>

<user>default</user>

>

</replica>

<host>cdh-slave-v06.julive.com</host>

<user>default</user>

</replica>

</shard>

<shard>

<internal_replication>true</internal_replication>

<host>cdh-slave-v05.julive.com</host>

<user>default</user>

</replica>

<host>cdh-slave-v04.julive.com</host>

<user>default</user>

</replica>

</shard>

<shard>

<internal_replication>true</internal_replication>

<host>cdh-slave-v06.julive.com</host>

<user>default</user>

</replica>

<host>cdh-slave-v05.julive.com</host>

<user>default</user>

</replica>

</shard>

</ch_cluster_all>

</clickhouse_remote_servers>

<zookeeper-servers>

<host>cdh-slave-v01.julive.com</host>

</node>

<host>cdh-slave-v02.julive.com</host>

</node>

<host>cdh-slave-v03.julive.com</host>

</node>

</zookeeper-servers>

</macros>

</networks>

<clickhouse_compression>

<case>

<min_part_size>10000000000</min_part_size>

<min_part_size_ratio>0.01</min_part_size_ratio>

</case>

</clickhouse_compression>

</yandex>

>

node1 :

layer/shard :01/01

node2:

layer/shard :01/02

node3:

layer/shard :01/03

node4:

layer/shard :01/02

node5:

layer/shard :01/03

node6:

layer/shard :01/01

#### 修改目录权限

--------------------

cd /data1 && chown -R clickhouse.clickhouse clickhouse

#### 创建进程守护

--------------------

# vim /etc/systemd/system/clickhouse_node1.service

[Unit]

Description=ClickHouse Server (analytic DBMS for big data)

Requires=network-online.target

After=network-online.target

[Service]

#Type=simple

Type=forking

User=clickhouse

Group=clickhouse

Restart=always

RestartSec=30

RuntimeDirectory=clickhouse-server

ExecStart=/usr/bin/clickhouse-server --daemon --config=/data1/clickhouse/ch_9000/config.xml --pid-file=/data1/clickhouse/node1/clickhouse-server.pid

LimitCORE=infinity

LimitNOFILE=500000

CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE

[Install]

WantedBy=multi-user.target

##### 启动进程

----------------------

centos-1主机进行如下操作：

systemctl start clickhouse_node1.service

systemctl start clickhouse_node4.service

centos-2主机进行如下操作：

systemctl start clickhouse_node2.service

systemctl start clickhouse_node5.service

centos-3主机进行如下操作：

systemctl start clickhouse_node3.service

systemctl start clickhouse_node6.service

验证如下端口是否被监听：

netstat -anlp|grep 9003 （clickhouse tcp端口）

netstat -anlp|grep 9002 （clickhouse tcp端口）

netstat -anlp|grep 8123 (clickhouse http端口)

netstat -anlp|grep 8124 (clickhouse http端口)

netstat -anlp|grep 9009 (clickhouse 数据交互端口)

netstat -anlp|grep 9010 (clickhouse 数据交互端口)

##### 登录测试

--------------------

clickhouse-client -u default --password 123456 --port 9003 -h192.168.10.65 --query="show databases"

clickhouse-client -u default --password 123456 --port 9003 -h192.168.10.65 -m

#### 测试建表

create database testdb on cluster ch_cluster_all;

CREATE DATABASE testdb ON CLUSTER ck_cluster

┌─host─────┬─port─┬─status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐

│ centos-3 │ 9000 │ 0 │ │ 5 │ 0 │

│ centos-2 │ 9000 │ 0 │ │ 4 │ 0 │

│ centos-1 │ 9002 │ 0 │ │ 3 │ 0 │

│ centos-3 │ 9002 │ 0 │ │ 2 │ 0 │

│ centos-1 │ 9000 │ 0 │ │ 1 │ 0 │

│ centos-2 │ 9002 │ 0 │ │ 0 │ 0 │

└──────────┴──────┴────────┴───────┴─────────────────────┴──────────────────┘

%s/centos-1/cdh-slave-v04.julive.com/g

%s/centos-2/cdh-slave-v05.julive.com/g

%s/centos-3/cdh-slave-v06.julive.com/g

vim /etc/systemd/system/clickhouse_node6.service

[Unit]

Description=ClickHouse Server (analytic DBMS for big data)

Requires=network-online.target

After=network-online.target

[Service]

#Type=simple

Type=forking

User=clickhouse

Group=clickhouse

Restart=always

RestartSec=30

RuntimeDirectory=clickhouse-server

ExecStart=/usr/bin/clickhouse-server --daemon --config=/data1/clickhouse/node6/config.xml --pid-file=/data1/clickhouse/node3/clickhouse-server.pid

LimitCORE=infinity

LimitNOFILE=500000

CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE

[Install]

WantedBy=multi-user.target

clickhouse-client -u default --password 123456 --port 9003 -h192.168.10.65 --query="show databases"

测试表mergeTree

----------------------------

create table t_order_mt(

id UInt32,

sku_id String,

total_amount Decimal(16,2),

create_time Datetime)

engine =MergeTree

partition by toYYYYMMDD(create_time)primary key (id)order by (id,sku_id);

insert into t_order_mt values(101,'sku_001',1000.00,'2020-06-01 12:00:00') ,

(102,'sku_002',2000.00,'2020-06-01 11:00:00'),

(102,'sku_004',2500.00,'2020-06-01 12:00:00'),

(102,'sku_002',2000.00,'2020-06-01 13:00:00'),

(102,'sku_002',12000.00,'2020-06-01 13:00:00'),

(102,'sku_002',600.00,'2020-06-02 12:00:00');

insert into t_order_mt values(101,'sku_001',1000.00,'2020-06-01 12:00:00') ,

(102,'sku_002',2000.00,'2020-06-01 11:00:00'),

(102,'sku_004',2500.00,'2020-06-01 12:00:00'),

(102,'sku_002',2000.00,'2020-06-01 13:00:00'),

(102,'sku_002',12000.00,'2020-06-01 13:00:00'),

(102,'sku_002',600.00,'2020-06-02 12:00:00');

optimize table t_order_mt final;

测试表ReplicatedMergeTree

----------------------------

create table t_order_rep2(

id UInt32,sku_id String,

total_amount Decimal(16,2),

create_time Datetime)

engine =ReplicatedMergeTree('/clickhouse/table/01/t_order_rep','node1')

partition by toYYYYMMDD(create_time)

primary key (id)

order by (id,sku_id);

insert into t_order_rep2 values

(101,'sku_001',1000.00,'2020-06-01 12:00:00'),

(102,'sku_002',2000.00,'2020-06-01 12:00:00'),

(103,'sku_004',2500.00,'2020-06-01 12:00:00'),

(104,'sku_002',2000.00,'2020-06-01 12:00:00'),

(105,'sku_003',600.00,'2020-06-02 12:00:00');

create table st_order_mt1 on cluster ch_cluster_all(

id UInt32,sku_id String,

total_amount Decimal(16,2),

create_time Datetime)

engine =ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/st_order_mt','{replica}')

partition by toYYYYMMDD(create_time)

primary key (id)

order by (id,sku_id);

create table st_order_mt_all on cluster ch_cluster_all(

id UInt32,

sku_id String,

total_amount Decimal(16,2),

create_time Datetime)

engine = Distributed(ch_cluster_all,testdb, st_order_mt1,hiveHash(sku_id));

insert into st_order_mt_all2 values

(201,'sku_001',1000.00,'2020-06-01 12:00:00') ,

(202,'sku_002',2000.00,'2020-06-01 12:00:00'),

(203,'sku_004',2500.00,'2020-06-01 12:00:00'),

(204,'sku_002',2000.00,'2020-06-01 12:00:00'),

(205,'sku_003',600.00,'2020-06-02 12:00:00');

Distributed（集群名称，库名，本地表名，分片键）分片键必须是整型数字，所以用hiveHash函数转换，也可以rand()

Alex_Sheng_Sea

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录