Clickhouse Centos74 rpm 安装
安装环境
华为云主机双cpu双核心,4G内存,centos7.4
192.168.1.223 ecs-app-0001.novalocal ecs-app-0001
192.168.1.240 ecs-app-0002.novalocal ecs-app-0002
192.168.1.197 ecs-app-0003.novalocal ecs-app-0003
详细命令请参考:
https://github.com/super-sponge/clickhose_deploy
1. ansible 安装及配置
yum install -y ansible
1.1 去掉 /etc/ansible/ansible.cfg host_key_checking 前面的注释
host_key_checking = False
1.2 编辑ansible host文件 (root 密码根据实际情况填写)
[clickhouse]
192.168.1.223 ansible_ssh_user=root ansible_ssh_pass=****
192.168.1.240 ansible_ssh_user=root ansible_ssh_pass=****
192.168.1.197 ansible_ssh_user=root ansible_ssh_pass=****
2. 安装clickhose
2.1 安装clickhouse
ansible clickhouse -m copy -a 'src=Altinity_clickhouse.repo dest=/etc/yum.repos.d'
ansible all -m yum -a "name=clickhouse-server state=present"
ansible all -m yum -a "name=clickhouse-client state=present"
备注:
也可以手工在每台主机上执行
curl -s https://packagecloud.io/install/repositories/Altinity/clickhouse/script.rpm.sh | bash
yum install -y clickhouse-server
yum install -y clickhouse-client
2.2 配置
配置config.xml 文件
vi /etc/clickhouse-server/config.xml
<path>/data/clickhouse/</path>
<tmp_path>/data/clickhouse/tmp/</tmp_path>
<interserver_http_host>ecs-app-0001.novalocal</interserver_http_host>
<listen_host>0.0.0.0</listen_host>
备注:
interserver_http_host 需要配置为实际的主机名
ansible all -m copy -a 'src=config.xml dest=/etc/clickhouse-server mode=0644'
进入每个主机更改interserver_http_host
配置分布式环境users.xml 和metrika.xml(实际内容请参考附件)
ansible all -m copy -a 'src=users.xml dest=/etc/clickhouse-server mode=0644'
ansible all -m copy -a 'src=metrika.xml dest=/etc/metrika.xml mode=0644'
备注:
metrika.xml 中(参考网上的资料,我对每个主机设置为不同的值,这个上面批量拷贝后,手工更改)
<!-- 本节点副本名称(这里无用) -->
<macros>
<replica>ck1</replica>
</macros>
3. 安装 zookeeper 以及jdk
请参考附件
4. 启动与停止
ansible clickhouse -m service -a "name=clickhouse-server state=started"
ansible clickhouse -m service -a "name=clickhouse-server state=stopped"
5. 测试
表创建
./create.sh
装在数据
./load.sh
cat create.sh
#!/bin/bash
base_dir=$(cd $(dirname $0); pwd)
for i in 1 2 3
do
clickhouse-client --host=ecs-app-000${i}.novalocal --password=6lYaUiFi -n < $base_dir/create.sql
done
load 数据
load.sh
#!/bin/bash
base_dir=$(cd $(dirname $0); pwd)
cd $base_dir/dss
./dbgen -s 10 -T a
sed -i 's/$/"1999-01-01"/' customer.tbl
cat customer.tbl | clickhouse-client --host=ecs-app-0001.novalocal --password=6lYaUiFi --query "INSERT INTO customerd FORMAT CSV"
cat lineorder.tbl | clickhouse-client --host=ecs-app-0001.novalocal --password=6lYaUiFi --query "INSERT INTO lineorderd FORMAT CSV"
cat part.tbl | clickhouse-client --host=ecs-app-0001.novalocal --password=6lYaUiFi --query "INSERT INTO partd FORMAT CSV"
cat supplier.tbl | clickhouse-client --host=ecs-app-0001.novalocal --password=6lYaUiFi --query "INSERT INTO supplierd FORMAT CSV"
ntp 时间同步配置
ansible all -m yum -a "name=chrony state=present"
附件:
1. 安装zookeeper 和 jdk
jdk 安装 (本次安装到全局环境)
ansible clickhouse -m file -a 'path=/usr/java state=directory recurse=yes mode=0755'
ansible clickhouse -m unarchive -a "src=jdk-8u191-linux-x64.tar.gz dest=/usr/java"
vi jdk.sh
JAVA_HOME=/usr/java/jdk1.8.0_191
CLASSPATH=$JAVA_HOME/lib/
PATH=$PATH:$JAVA_HOME/bin
export PATH JAVA_HOME CLASSPATH
ansible clickhouse -m copy -a "src=jdk.sh dest=/etc/profile.d"
zookeeper 安装
ansible clickhouse -m group -a 'name=hadoop state=present'
ansible clickhouse -m user -a 'name=zookeeper shell=/bin/bash group=hadoop state=present'
ansible clickhouse -m file -a 'path=/home/zookeeper/app state=directory recurse=yes owner=zookeeper group=hadoop mode=0744'
ansible clickhouse -m unarchive -a "src=zookeeper-3.4.13.tar.gz dest=/home/zookeeper/app owner=zookeeper group=hadoop"
ansible clickhouse -m file -a 'path=/data/zookeeper/data state=directory recurse=yes owner=zookeeper group=hadoop mode=0744'
ansible clickhouse -m file -a 'path=/data/zookeeper/datalog state=directory recurse=yes owner=zookeeper group=hadoop mode=0744'
vi zoo.cfg
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/data/zookeeper/data
dataLogDir=/data/zookeeper/datalog
clientPort=2181
server.0=ecs-app-0001.novalocal:2888:3888
server.1=ecs-app-0002.novalocal:2888:3888
server.2=ecs-app-0003.novalocal:2888:3888
ansible clickhouse -m copy -a 'src=zoo.cfg dest=/home/zookeeper/app/zookeeper-3.4.13/conf owner=zookeeper group=hadoop mode=0644'
vi zk.sh
#!/bin/bash
for id in 0 1 2
do
echo $id > myid
ansible clickhouse[$id] --become-user zookeeper -b -m copy -a "src=myid dest=/data/zookeeper/data owner=zookeeper group=hadoop"
done
启动(下面启动没有成功,直接登录每台主机启动)
ansible clickhouse --become-user zookeeper -b -m command -a "/home/zookeeper/app/zookeeper-3.4.13/bin/zkServer.sh start"
停止
ansible clickhouse --become-user zookeeper -b -m command -a "/home/zookeeper/app/zookeeper-3.4.13/bin/zkServer.sh stop"
验证
./zkCli.sh
create /zk_test my_data
2. metrika.xml
<yandex>
<!-- 集群配置 -->
<clickhouse_remote_servers>
<bip_ck_cluster>
<!-- 数据分片1 -->
<shard>
<internal_replication>false</internal_replication>
<replica>
<host>ecs-app-0001.novalocal</host>
<port>9000</port>
<user>default</user>
<password>6lYaUiFi</password>
</replica>
</shard>
<!-- 数据分片2 -->
<shard>
<internal_replication>false</internal_replication>
<replica>
<host>ecs-app-0002.novalocal</host>
<port>9000</port>
<user>default</user>
<password>6lYaUiFi</password>
</replica>
</shard>
<!-- 数据分片3 -->
<shard>
<internal_replication>false</internal_replication>
<replica>
<host>ecs-app-0003.novalocal</host>
<port>9000</port>
<user>default</user>
<password>6lYaUiFi</password>
</replica>
</shard>
</bip_ck_cluster>
</clickhouse_remote_servers>
<!-- 本节点副本名称(这里无用) -->
<macros>
<replica>ck1</replica>
</macros>
<!-- 监听网络(貌似重复) -->
<networks>
<ip>::/0</ip>
</networks>
<!-- ZK -->
<zookeeper-servers>
<node index="1">
<host>ecs-app-0001.novalocal</host>
<port>2181</port>
</node>
<node index="2">
<host>ecs-app-0002.novalocal</host>
<port>2181</port>
</node>
<node index="3">
<host>ecs-app-0003.novalocal</host>
<port>2181</port>
</node>
</zookeeper-servers>
<!-- 数据压缩算法 -->
<clickhouse_compression>
<case>
<min_part_size>10000000000</min_part_size>
<min_part_size_ratio>0.01</min_part_size_ratio>
<method>lz4</method>
</case>
</clickhouse_compression>
</yandex>
3. users.xml
<?xml version="1.0"?>
<yandex>
<profiles>
<!-- 读写用户设置 -->
<default>
<max_memory_usage>10000000000</max_memory_usage>
<use_uncompressed_cache>0</use_uncompressed_cache>
<load_balancing>random</load_balancing>
</default>
<!-- 只写用户设置 -->
<readonly>
<max_memory_usage>10000000000</max_memory_usage>
<use_uncompressed_cache>0</use_uncompressed_cache>
<load_balancing>random</load_balancing>
<readonly>1</readonly>
</readonly>
</profiles>
<!-- 配额 -->
<quotas>
<!-- Name of quota. -->
<default>
<interval>
<duration>3600</duration>
<queries>0</queries>
<errors>0</errors>
<result_rows>0</result_rows>
<read_rows>0</read_rows>
<execution_time>0</execution_time>
</interval>
</default>
</quotas>
<users>
<!-- 读写用户 -->
<default>
<password_sha256_hex>967f3bf355dddfabfca1c9f5cab39352b2ec1cd0b05f9e1e6b8f629705fe7d6e</password_sha256_hex>
<networks incl="networks" replace="replace">
<ip>::/0</ip>
<ip>0.0.0.0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
</default>
<!-- 只读用户 -->
<ck>
<password_sha256_hex>967f3bf355dddfabfca1c9f5cab39352b2ec1cd0b05f9e1e6b8f629705fe7d6e</password_sha256_hex>
<networks incl="networks" replace="replace">
<ip>::/0</ip>
<ip>0.0.0.0</ip>
</networks>
<profile>readonly</profile>
<quota>default</quota>
</ck>
</users>
</yandex>
4. create.sql
drop table if exists lineorder;
drop table if exists customer;
drop table if exists part;
drop table if exists supplier;
drop table if exists lineorderd;
drop table if exists customerd;
drop table if exists partd;
drop table if exists supplierd;
CREATE TABLE lineorder (
LO_ORDERKEY UInt32,
LO_LINENUMBER UInt8,
LO_CUSTKEY UInt32,
LO_PARTKEY UInt32,
LO_SUPPKEY UInt32,
LO_ORDERDATE Date,
LO_ORDERPRIORITY String,
LO_SHIPPRIORITY UInt8,
LO_QUANTITY UInt8,
LO_EXTENDEDPRICE UInt32,
LO_ORDTOTALPRICE UInt32,
LO_DISCOUNT UInt8,
LO_REVENUE UInt32,
LO_SUPPLYCOST UInt32,
LO_TAX UInt8,
LO_COMMITDATE Date,
LO_SHIPMODE String
)Engine=MergeTree(LO_ORDERDATE,(LO_ORDERKEY,LO_LINENUMBER),8192);
CREATE TABLE customer (
C_CUSTKEY UInt32,
C_NAME String,
C_ADDRESS String,
C_CITY String,
C_NATION String,
C_REGION String,
C_PHONE String,
C_MKTSEGMENT String,
C_FAKEDATE Date
)Engine=MergeTree(C_FAKEDATE,(C_CUSTKEY),8192);
CREATE TABLE part (
P_PARTKEY UInt32,
P_NAME String,
P_MFGR String,
P_CATEGORY String,
P_BRAND String,
P_COLOR String,
P_TYPE String,
P_SIZE UInt8,
P_CONTAINER String,
P_FAKEDATE Date
)Engine=MergeTree(P_FAKEDATE,(P_PARTKEY),8192);
CREATE TABLE supplier (
S_SUPPKEY UInt32,
S_NAME String,
S_ADDRESS String,
S_CITY String,
S_NATION String,
S_REGION String,
S_PHONE String,
S_FAKEDATE Date
)Engine=MergeTree(S_FAKEDATE,(S_SUPPKEY),8192);
CREATE TABLE lineorderd AS lineorder ENGINE = Distributed(bip_ck_cluster, default, lineorder, rand());
CREATE TABLE customerd AS customer ENGINE = Distributed(bip_ck_cluster, default, customer, rand());
CREATE TABLE partd AS part ENGINE = Distributed(bip_ck_cluster, default, part, rand());
CREATE TABLE supplierd AS supplier ENGINE = Distributed(bip_ck_cluster, default, supplier, rand());