一、Clickhouse部署
https://clickhouse.tech/#quick-start
https://repo.yandex.ru/clickhouse/tgz/stable/
Ubuntu在线安装
# https://clickhouse.com/docs/zh/getting-started/install
apt-get install -y apt-transport-https ca-certificates dirmngr
apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754
echo "deb https://packages.clickhouse.com/deb stable main" | tee /etc/apt/sources.list.d/clickhouse.list
sudo apt-get update
sudo apt-get install -y clickhouse-server clickhouse-client
sudo service clickhouse-server start
clickhouse-client # or "clickhouse-client --password" if you've set up a password.
下载离线安装包
# ubuntu2004下安装包下载
list=( clickhouse-client_21.5.6.6_all.deb clickhouse-common-static_21.5.6.6_amd64.deb clickhouse-server_21.5.6.6_all.deb CLICKHOUSE-KEY.GPG)
wget https://repo.clickhouse.tech/CLICKHOUSE-KEY.GPG
for deb in ${list[@]} ; do
wget https://repo.yandex.ru/clickhouse/deb/stable/main/${deb}
done
# centos7.9下安装包下载
list=( clickhouse-client-21.5.6.6-2.noarch.rpm clickhouse-common-static-21.5.6.6-2.x86_64.rpm clickhouse-common-static-dbg-21.5.6.6-2.x86_64.rpm clickhouse-server-21.5.6.6-2.noarch.rpm CLICKHOUSE-KEY.GPG)
wget https://repo.clickhouse.tech/CLICKHOUSE-KEY.GPG
for rpm in ${list[@]} ; do
wget https://repo.yandex.ru/clickhouse/rpm/stable/x86_64/${rpm}
done
执行安装
# 如果安装失败则按提示顺序安装
dpkg -i *.deb
rpm -ivh *.rpm
cp /etc/clickhouse-server/config.xml /etc/clickhouse-server/config_sample.xml && \
cp /etc/clickhouse-server/users.xml /etc/clickhouse-server/users_sample.xml
# 卸载的方法
rpm -qa | grep clickhouse
rpm -e clickhouse-client-21.5.6.6-2.noarch
rpm -e clickhouse-server-21.5.6.6-2.noarch
rpm -e clickhouse-common-static-21.5.6.6-2.x86_64
rm -rf /etc/clickhouse /var/lib/clickhouse /var/log/clickhouse
备份和修改配置文件
rm -rf /etc/clickhouse-server/config.xml /etc/clickhouse-server/users.xml && \
cp /etc/clickhouse-server/config_sample.xml /etc/clickhouse-server/config.xml && \
sudo sed -i 's/<!-- <listen_host>::<\/listen_host> -->/<listen_host>::<\/listen_host>/g' /etc/clickhouse-server/config.xml &&\
sudo sed -i 's/<!-- <timezone>Europe\/Moscow<\/timezone> -->/<timezone>Asia\/Shanghai<\/timezone>/g' /etc/clickhouse-server/config.xml &&\
sudo sed -i 's/<max_concurrent_queries>100<\/max_concurrent_queries>/<max_concurrent_queries>1000<\/max_concurrent_queries>/g' /etc/clickhouse-server/config.xml &&\
sudo sed -i 's/<default_session_timeout>60<\/default_session_timeout>/<default_session_timeout>3600<\/default_session_timeout>/g' /etc/clickhouse-server/config.xml &&\
sudo sed -i 's/<max_connections>4096<\/max_connections>/<max_connections>10240<\/max_connections>/g' /etc/clickhouse-server/config.xml && \
sudo sed -i 's/<ttl>event_date + INTERVAL 30 DAY DELETE<\/ttl>/<ttl>event_date + INTERVAL 7 DAY DELETE<\/ttl>/g' /etc/clickhouse-server/config.xml && \
sudo sed -i 's/<trace_log>/<trace_log><ttl>event_date + INTERVAL 7 DAY DELETE<\/ttl>/g' /etc/clickhouse-server/config.xml && \
sudo sed -i 's/<query_thread_log>/<query_thread_log><ttl>event_date + INTERVAL 7 DAY DELETE<\/ttl>/g' /etc/clickhouse-server/config.xml && \
cp /etc/clickhouse-server/users_sample.xml /etc/clickhouse-server/users.xml && \
sudo sed -i 's/<load_balancing>random<\/load_balancing>/<load_balancing>random<\/load_balancing><distributed_ddl_task_timeout>3600<\/distributed_ddl_task_timeout><connect_timeout>3600<\/connect_timeout><receive_timeout>3600<\/receive_timeout><send_timeout>3600<\/send_timeout>/g' /etc/clickhouse-server/users.xml && \
sudo chown -R clickhouse:clickhouse /etc/clickhouse-server && \
sudo chown -R clickhouse:clickhouse /var/log/clickhouse-server
clickhouse-client
# --database / -d 登录的数据库
# --help 查看帮助信息
# --host / -h 服务端地址,默认是 localhost,如果修改 config.xml 中的 listen_host 值后可以使用此参数指定访问的 ip
# --multiline / -m 支持SQL多行语句,而不是回车就执行
# --multiquery / -n 允许一次执行多条 SQL 语句
# --password 登录的密码,默认值为空
# --port 服务端的 TCP 端口,默认值为 9000
# --query / -q 指定 SQL 语句
# --time / -t 打印每条 SQL 的执行时间
# --user / -u 登录的用户名,默认值为 default
# --version / -V 查看版本信息
手动修改配置文件
sudo -u clickhouse vi /etc/clickhouse-server/config.xml
<clickhouse_cluster01>
<shard>
<weight>1</weight>
<internal_replication>true</internal_replication>
<replica>
<host>node1</host>
<port>9000</port>
<user>default</user>
<password>clickhouse</password>
</replica>
<replica>
<host>node2</host>
<port>9001</port>
<user>default</user>
<password>clickhouse</password>
</replica>
</shard>
<shard>
<weight>1</weight>
<internal_replication>true</internal_replication>
<replica>
<host>node2</host>
<port>9000</port>
<user>default</user>
<password>clickhouse</password>
</replica>
<replica>
<host>node3</host>
<port>9001</port>
<user>default</user>
<password>clickhouse</password>
</replica>
</shard>
<shard>
<weight>1</weight>
<internal_replication>true</internal_replication>
<replica>
<host>node3</host>
<port>9000</port>
<user>default</user>
<password>clickhouse</password>
</replica>
<replica>
<host>node1</host>
<port>9001</port>
<user>default</user>
<password>clickhouse</password>
</replica>
</shard>
</clickhouse_cluster01>
<zookeeper>
<node index="1">
<host>node1</host>
<port>2181</port>
</node>
<node index="2">
<host>node2</host>
<port>2181</port>
</node>
<node index="3">
<host>node3</host>
<port>2181</port>
</node>
</zookeeper>
<macros>
<cluster>clickhouse_cluster01</cluster>
<layer>01</layer>
<shard>01</shard>
<replica>clickhouse-cluster01-01-1</replica>
</macros>
<networks>
<ip>::/0</ip>
</networks>
<clickhouse_compression>
<case>
<min_part_size>10000000000</min_part_size>
<min_part_size_ratio>0.01</min_part_size_ratio>
<method>lz4</method>
</case>
</clickhouse_compression>
复制到其他机器
scp /etc/clickhouse-server/config.xml root@node1:/etc/clickhouse-server/config.xml
创建多实例
no=2 &&\
sudo mkdir -p /var/lib/clickhouse$no &&\
sudo mkdir -p /var/lib/clickhouse$no/tmp &&\
sudo mkdir -p /var/lib/clickhouse$no/access &&\
sudo mkdir -p /var/lib/clickhouse$no/format_schemas &&\
sudo chown -R clickhouse:clickhouse /var/lib/clickhouse$no &&\
\
sudo cp -f /etc/systemd/system/clickhouse-server.service /etc/systemd/system/clickhouse-server$no.service &&\
sudo sed -i "s/config.xml/config$no.xml/g" /etc/systemd/system/clickhouse-server$no.service &&\
sudo sed -i "s/clickhouse-server.pid/clickhouse-server$no.pid/g" /etc/systemd/system/clickhouse-server$no.service &&\
sudo systemctl daemon-reload &&
sudo systemctl enable /etc/systemd/system/clickhouse-server$no.service &&\
\
sudo cp -f /etc/cron.d/clickhouse-server /etc/cron.d/clickhouse-server$no &&\
sudo sed -i "s/clickhouse-server/clickhouse-server$no/g" /etc/cron.d/clickhouse-server$no &&\
\
sudo cp -f /etc/init.d/clickhouse-server /etc/init.d/clickhouse-server$no &&\
sudo sed -i "s/CLICKHOUSE_LOGDIR=\/var\/log\/clickhouse-server/CLICKHOUSE_LOGDIR=\/var\/log\/clickhouse-server$no/g" /etc/init.d/clickhouse-server$no &&\
sudo sed -i "s/CLICKHOUSE_DATADIR=\/var\/lib\/clickhouse/CLICKHOUSE_DATADIR=\/var\/lib\/clickhouse$no/g" /etc/init.d/clickhouse-server$no &&\
sudo sed -i "s/CLICKHOUSE_CRONFILE=\/etc\/cron.d\/clickhouse-server/CLICKHOUSE_CRONFILE=\/etc\/cron.d\/clickhouse-server$no/g" /etc/init.d/clickhouse-server$no &&\
sudo sed -i "s/CLICKHOUSE_CONFIG=\$CLICKHOUSE_CONFDIR\/config.xml/CLICKHOUSE_CONFIG=\$CLICKHOUSE_CONFDIR\/config$no.xml/g" /etc/init.d/clickhouse-server$no &&\
sudo sed -i "s/LOCKFILE=\$LOCALSTATEDIR\/\$PROGRAM/LOCKFILE=\$LOCALSTATEDIR\/\${PROGRAM}$no/g" /etc/init.d/clickhouse-server$no &&\
sudo sed -i "s/CLICKHOUSE_PIDDIR=\/var\/run\/\$PROGRAM/CLICKHOUSE_PIDDIR=\/var\/run\/\${PROGRAM}$no/g" /etc/init.d/clickhouse-server$no &&\
\
sudo cp -f /etc/clickhouse-server/config.xml /etc/clickhouse-server/config$no.xml &&\
sudo chown clickhouse:clickhouse /etc/clickhouse-server/config$no.xml &&\
sudo -u clickhouse sed -i "s/<http_port>8123<\/http_port>/<http_port>8124<\/http_port>/g" /etc/clickhouse-server/config$no.xml &&\
sudo -u clickhouse sed -i "s/<tcp_port>9000<\/tcp_port>/<tcp_port>9001<\/tcp_port>/g" /etc/clickhouse-server/config$no.xml &&\
sudo -u clickhouse sed -i "s/<mysql_port>9004<\/mysql_port>/<mysql_port>9014<\/mysql_port>/g" /etc/clickhouse-server/config$no.xml &&\
sudo -u clickhouse sed -i "s/<postgresql_port>9005<\/postgresql_port>/<postgresql_port>9015<\/postgresql_port>/g" /etc/clickhouse-server/config$no.xml &&\
sudo -u clickhouse sed -i "s/<interserver_http_port>9009<\/interserver_http_port>/<interserver_http_port>9019<\/interserver_http_port>/g" /etc/clickhouse-server/config$no.xml &&\
sudo -u clickhouse sed -i "s/<port>9363<\/port>/<port>9364<\/port>/g" /etc/clickhouse-server/config$no.xml &&\
sudo -u clickhouse sed -i "s/<path>\/var\/lib\/clickhouse\/<\/path>/<path>\/var\/lib\/clickhouse2\/<\/path>/g" /etc/clickhouse-server/config$no.xml &&\
sudo -u clickhouse sed -i "s/<tmp_path>\/var\/lib\/clickhouse\/tmp\/<\/tmp_path>/<tmp_path>\/var\/lib\/clickhouse2\/tmp\/<\/tmp_path>/g" /etc/clickhouse-server/config$no.xml &&\
sudo -u clickhouse sed -i "s/<user_files_path>\/var\/lib\/clickhouse\/user_files\/<\/user_files_path>/<user_files_path>\/var\/lib\/clickhouse2\/user_files\/<\/user_files_path>/g" /etc/clickhouse-server/config$no.xml &&\
sudo -u clickhouse sed -i "s/<path>\/var\/lib\/clickhouse\/access\/<\/path>/<path>\/var\/lib\/clickhouse2\/access\/<\/path>/g" /etc/clickhouse-server/config$no.xml
手动修改里的分片号和副本号
sudo -u clickhouse vi /etc/clickhouse-server/config2.xml
二、ClickHouse操作
1.1 备份还原库表
-- 执行sql备份:目录为/var/lib/clickhouse/shadow/
alter table vehicle_gas.mec_mileage freeze
-- 转移文件
mv /var/lib/clickhouse/shadow/* .
-- 防止下次备份时忘了删除
rm -rf /var/lib/clickhouse/shadow/*
-- 准备还原
cp -rl ~/backup/vehicle_gas_mec_mileage/1/store/542/5420cdbd-a907-4514-9040-74ff97012715/* /var/lib/clickhouse/data/vehicle_gas/mec_mileage/detached/
chown clickhouse:clickhouse -R /var/lib/clickhouse/data/vehicle_gas/mec_mileage/detached/
-- 还原指定分区
echo 'alter table vehicle_gas.mec_mileage attach partition 197001' | clickhouse-client -h localhost -u default --password clickhouse
-- https://blog.csdn.net/qq_35423190/article/details/110238007
-- 还可以使用 clickhouse-backup
-- https://github.com/AlexAkulov/clickhouse-backup/
-- rpm -ivh clickhouse-backup-1.3.1-1.x86_64.rpm
1.2 创建本地复制表和分布式表
参考:https://support.huaweicloud.com/cmpntguide-mrs/mrs_01_2398.html
clickhouse-client -h datanode1 --port 9000 -m -u default --password clickhouse -q "select 1"
clickhouse-client -h 127.0.0.1 --port 9000 -m -u default --password clickhouse --multiquery < ./ck.sql
echo 'select * from dm.warn_car_data' | curl 'http://datanode1:8123/?user=default&password=clickhouse' -d @-
clickhouse-client -h web1 --port 9000 -m -u default --password clickhouse -q \
"select parent_car_type name, count(car_id) count from dm.car_info where parent_car_type != '' and parent_car_type is not null group by parent_car_type;"
watch -n 1 "clickhouse-client -h web1 --port 9000 -m -u default --password clickhouse -q \
\"select * from system.errors;\""
watch -n 1 "clickhouse-client -h web1 --port 9000 -m -u default --password clickhouse -q \ \"
select affiliate_co name, count(car_id) count from dm.car_info where obd_code != '' \
and obd_code is not null and affiliate_co != '' and affiliate_co is not null group by affiliate_co;\""
1.3 导出数据
-- 方法一: 远程导出命令,默认分割符是tab:
echo 'select now()' | curl 'http://web1:8123/?user=default&password=clickhouse' -d @- > result.sql
echo 'select now()' | curl 'http://web1:8123/?user=default&password=clickhouse' -d @-
curl 'http://datanode1:8123/?user=default&password=clickhouse' -d 'select now()' > result.sql
-- 方法二
clickhouse-client -h web1 --port 9000 -m -u default --password clickhouse \
--query="select * from vehicle_gas.mec_run limit 10 FORMAT CSVWithNames" > vehicle_gas_mec_run.csv
1.4 集群分布式库表操作
-- 使用ReplicatedMergeTree引擎创建复制表
drop table if exists default.test;
drop table if exists default.test_0 on cluster '{cluster}';
create table if not exists default.test_0 on cluster '{cluster}'
(
data String(64) comment '数据',
create_time datetime
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/default/test_0', '{replica}')
PARTITION BY toYYYYMM(create_time)
ORDER BY toYYYYMM(create_time)
SETTINGS index_granularity = 8192;
-- 查看zk状态
/opt/apache-zookeeper-3.6.3-bin/bin/zkServer.sh status
-- replicas 已存在,执行删除
/opt/apache-zookeeper-3.6.3-bin/bin/zkCli.sh -server 127.0.0.1:2181
[zk: 127.0.0.1:2181(CONNECTED) 1] ls /
[zk: 127.0.0.1:2181(CONNECTED) 1] deleteall /clickhouse
-- 使用Distributed引擎创建分布式表
create table if not exists test
on cluster '{cluster}' AS test_0
ENGINE = Distributed('{cluster}', default, test_0, rand());
-- 插入数据到本地表
insert into test values
('path1','2020-09-01'),
('path2','2020-09-02'),
('path3','2020-09-03'),
('path4','2020-09-04'),
('path5','2020-09-05'),
('path6','2020-09-06'),
('path7','2020-09-07'),
('path8','2020-09-08'),
('path9','2020-09-09'),
('path10','2020-09-10'),
('path11','2020-09-11'),
('path12','2020-09-12'),
('path13','2020-09-13'),
('path14','2020-09-14'),
('path15','2020-09-15'),
('path16','2020-09-16'),
('path17','2020-09-17'),
('path18','2020-09-18');
-- 查询本地表信息
select * from test_0;
-- 查询Distributed分布式表
select * from test;
-- 其他
ALTER TABLE test DELETE where 1=1
rm -rf /clickhouse/tables/01-01/default/test_clusters_ha/replicas/clickhouse-cluster01-01-1 \
/clickhouse/tables/01-01/default/test_clusters_ha/replicas/clickhouse-cluster01-01-2 \
/clickhouse/tables/01-02/default/test_clusters_ha/replicas/clickhouse-cluster01-02-1 \
/clickhouse/tables/01-02/default/test_clusters_ha/replicas/clickhouse-cluster01-02-2 \
/clickhouse/tables/01-03/default/test_clusters_ha/replicas/clickhouse-cluster01-03-1 \
/clickhouse/tables/01-03/default/test_clusters_ha/replicas/clickhouse-cluster01-03-2 \
/opt/zookeeper/log/* /opt/zookeeper/data/ver*
-- 查看数据库容量,以测试数据为参考
select
sum(rows) as "总行数",
formatReadableSize(sum(data_uncompressed_bytes)) as "原始大小",
formatReadableSize(sum(data_compressed_bytes)) as "压缩大小",
round(sum(data_compressed_bytes) / sum(data_uncompressed_bytes) * 100, 0) "压缩率"
from system.parts;
┌──────总行数─┬─原始大小──┬─压缩大小─┬─压缩率─┐
│ 169851144587 │ 30.76 TiB │ 4.51 TiB │ 15 │
└─────────────┴───────────┴──────────┴────────┘
1 rows in set. Elapsed: 1.462 sec. Processed 90.35 thousand rows, 42.81 MB (63.76 thousand rows/s., 30.60 MB/s.)
-- 查询test表,2019年10月份的数据容量
select
table as "表名",
sum(rows) as "总行数",
formatReadableSize(sum(data_uncompressed_bytes)) as "原始大小",
formatReadableSize(sum(data_compressed_bytes)) as "压缩大小",
round(sum(data_compressed_bytes) / sum(data_uncompressed_bytes) * 100, 0) "压缩率"
from system.parts
-- 根据实际情况加查询条件
where table in('test')
and partition like '2019-10-%'
group by table;
┌─────总行数─┬─原始大小───┬─压缩大小──┬─压缩率─┐
│ 3015199559 │ 727.83 GiB │ 60.34 GiB │ 8 │
└────────────┴────────────┴───────────┴────────┘
1 rows in set. Elapsed: 0.227 sec. Processed 3.52 thousand rows, 1.70 MB (15.48 thousand rows/s., 7.50 MB/s.)
错误解决:
1、clickhouse_driver.errors.UnexpectedPacketFromServerError: Code: 102. Unexpected packet from server None:None (expected Hello or Exception, got Unknown packet)