本例子可以支持灵活配置m*n的clickhouse集群
使用方法
分为执行脚本和配置文件两个
配置文件config.toml
机器的用户名和密码要配置成一样的
# 分片数
shard_num = 2
# 副本数
replica_num = 2
# 机器ip列表
host_list = ["10.0.0.2","10.0.0.3","10.0.0.4","10.0.0.5"]
# zk ip或域名列表
zk_host_list = ["10.0.0.6","10.0.0.7","10.0.0.8"]
# 机器用户名
user = "root"
# 机器密码
password = "xxx"
脚本config_gen.py
import os,sys
import paramiko
import toml
# install pip: curl https://bootstrap.pypa.io/get-pip.py | python3
# install paramiko: pip install paramiko toml
YANDEX = """<yandex>
<clickhouse_remote_servers>
<default>
@cluster@
</default>
</clickhouse_remote_servers>
<zookeeper-servers>
@zookeeper@
</zookeeper-servers>
<macros>
<shard>@shard@</shard>
<replica>@replica@</replica>
</macros>
</yandex>"""
SHARD = """
<shard>
<internal_replication>true</internal_replication>
@servers@
</shard>
"""
REPLICA = """<replica>
<host>@host@</host>
<port>9000</port>
</replica>
"""
NODE = """<node index="@index@">
<host>@host@</host>
<port>2181</port>
</node>
"""
HOST = """<yandex>
<listen_host>::</listen_host>
<listen_host>0.0.0.0</listen_host>
<listen_try>1</listen_try>
</yandex>
"""
# 获取主机名
def get_hostname(host, user, password):
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(host,22,user,password)
stdin, stdout, stderr = ssh.exec_command("hostname -s")
hostname = stdout.read().decode().replace("\n", "")
return hostname
# 生成cluster相关配置xml节
def generate_cluster(replica_num, host_list, user, password):
index = 0
tmp_shards = ""
tmp_replica = ""
while index < len(host_list):
if index % replica_num == 0:
if index > 0:
tmp_shards = tmp_shards + SHARD.replace("@servers@", tmp_replica)
tmp_replica = ""
tmp_replica = tmp_replica + REPLICA.replace("@host@", get_hostname(host_list[index], user, password))
elif index == len(host_list) - 1:
tmp_replica = tmp_replica + REPLICA.replace("@host@", get_hostname(host_list[index], user, password))
tmp_shards = tmp_shards + SHARD.replace("@servers@", tmp_replica)
else:
tmp_replica = tmp_replica + REPLICA.replace("@host@", get_hostname(host_list[index], user, password))
index += 1
config = YANDEX.replace("@cluster@", tmp_shards)
return config
# 生成zk相关配置xml节
def generate_zookeeper(zk_host_list, config):
index = 0
tmp_nodes= ""
while index < len(zk_host_list):
node = NODE.replace("@host@", zk_host_list[index])
node = node.replace("@index@", str(index + 1))
tmp_nodes = tmp_nodes + node
index += 1
config = config.replace("@zookeeper@", tmp_nodes)
return config
# 生成macros相关xml节,并生成各个主机的metrika.xml配置文件,例如metrika.xml-10.0.0.2
def generate_metrika_config_file(shard_num, replica_num, host_list, config, user, password):
index = 0
for i in range(shard_num):
for j in range(replica_num):
macros = config.replace("@shard@", str(i))
macros = macros.replace("@replica@", get_hostname(host_list[index], user, password))
f = open(sys.path[0] + "/metrika.xml" + "-" + host_list[index], 'w')
f.write(macros)
f.close()
index += 1
# 生成clickhouse允许的ip访问权限配置文件
def generate_listen_config_file():
f = open(sys.path[0] + "/listen_host.xml", 'w')
f.write(HOST)
f.close()
# 推送metrika.xml-*和listen_host.xml到各个主机
def push_config_file(host_list, remote_path, user, password):
if remote_path == "":
remote_path = "/etc/"
for host in host_list:
transport = paramiko.Transport((host, 22))
transport.connect(username=user, password=password)
sftp = paramiko.SFTPClient.from_transport(transport)
metrika_path = sys.path[0] + "/metrika.xml-" + host
sftp.put(metrika_path, remote_path + "metrika.xml")
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(host,22,user,password)
cmd = "mkdir -p /etc/clickhouse-server/config.d"
stdin, stdout, stderr = ssh.exec_command(cmd)
listen_host_path = sys.path[0] + "/listen_host.xml"
sftp.put(listen_host_path, remote_path + "clickhouse-server/config.d/listen_host.xml")
transport.close()
# 在各个主机上用yum安装并启动clickhouse
def install_and_start_clickhouse(host, user, password):
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(host,22,user,password)
cmd = "{} && {} && {}".format("curl -s https://packagecloud.io/install/repositories/altinity/clickhouse/script.rpm.sh | bash",
"yum install -y clickhouse-server clickhouse-client",
"/etc/init.d/clickhouse-server restart")
stdin, stdout, stderr = ssh.exec_command(cmd)
errmsg = stderr.read()
if errmsg.decode() != "":
print(errmsg)
else:
print("host " + host + " install finished and started")
ssh.close()
# 获取/etc/hosts需要配置的host信息
def get_hosts(host_list):
host_list_str = ""
for host in host_list:
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(host,22,user,password)
stdin, stdout, stderr = ssh.exec_command("hostname -s")
hostname = stdout.read().decode().replace("\n", "")
stdin, stdout, stderr = ssh.exec_command("ifconfig -a|grep inet|grep -v 127.0.0.1|grep -v inet6|awk '{print $2}'|tr -d \"addr:\"")
ip = stdout.read().decode().replace("\n", "")
host_list_str = host_list_str + ip + " " + hostname + "\n"
return host_list_str
# 将host信息推送到各个主机节点
def add_hosts(host, host_list_str):
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(host,22,user,password)
add_host_str = 'echo "{}" >> /etc/hosts'.format(host_list_str)
stdin, stdout, stderr = ssh.exec_command(add_host_str)
# 启动clickhouse集群
def start_clickhouse_cluster(host_list, user, password):
host_list_str = get_hosts(host_list)
for host in host_list:
hostname = get_hostname(host, user, password)
add_hosts(host, "127.0.0.1 " + hostname + "\n" + host_list_str)
install_and_start_clickhouse(host, user, password)
# 读取配置文件config.toml
def read_config():
config_file = 'config.toml'
if not os.path.exists(config_file):
input(config_file + ' not found')
sys.exit(-1)
with open(config_file, mode='rb') as f:
content = f.read()
if content.startswith(b'\xef\xbb\xbf'):
content = content[3:]
dic = toml.loads(content.decode('utf8'))
return dic
if __name__ == "__main__":
dic = read_config()
shard_num = dic['shard_num']
print('shard_num: %s' % shard_num)
replica_num = dic['replica_num']
print('replica_num: %s' % replica_num)
user = dic['user'].strip()
print('user: %s' % user)
password = dic['password'].strip()
print('password: %s' % password)
host_list = dic['host_list']
print('host_list: %s' % host_list)
zk_host_list = dic['zk_host_list']
print('zk_host_list: %s' % zk_host_list)
remote_path = dic['remote_path'].strip()
print('remote_path: %s' % remote_path)
if len(host_list) % replica_num:
print("host_list is invalid")
config = generate_cluster(replica_num, host_list, user, password)
config = generate_zookeeper(zk_host_list, config)
generate_listen_config_file()
generate_metrika_config_file(shard_num, replica_num, host_list, config, user, password)
push_config_file(host_list, remote_path, user, password)
start_clickhouse_cluster(host_list, user, password)
将脚本和其配置文件放在同路径下,运行脚步即可
python3 config_gen.py

该脚本`config_gen.py`配合`config.toml`文件,能灵活配置m*n的ClickHouse集群。它会生成集群配置XML,创建主机间通信的metrika.xml文件,设置允许的IP访问权限,并将配置文件推送到各节点,最后安装并启动ClickHouse服务。此过程涉及Zookeeper配置、主机名获取、SSH连接等操作。
4366

被折叠的 条评论
为什么被折叠?



