【clickhouse系列】搭建clickhouse集群仅仅需要一个脚本而已

该脚本`config_gen.py`配合`config.toml`文件,能灵活配置m*n的ClickHouse集群。它会生成集群配置XML,创建主机间通信的metrika.xml文件,设置允许的IP访问权限,并将配置文件推送到各节点,最后安装并启动ClickHouse服务。此过程涉及Zookeeper配置、主机名获取、SSH连接等操作。
摘要由CSDN通过智能技术生成

本例子可以支持灵活配置m*n的clickhouse集群

使用方法

分为执行脚本和配置文件两个

配置文件config.toml

机器的用户名和密码要配置成一样的

# 分片数
shard_num = 2
# 副本数
replica_num = 2
# 机器ip列表
host_list = ["10.0.0.2","10.0.0.3","10.0.0.4","10.0.0.5"]
# zk ip或域名列表
zk_host_list = ["10.0.0.6","10.0.0.7","10.0.0.8"]
# 机器用户名
user = "root"
# 机器密码
password = "xxx"
脚本config_gen.py
import os,sys
import paramiko
import toml

# install pip: curl https://bootstrap.pypa.io/get-pip.py | python3
# install paramiko: pip install paramiko toml

YANDEX = """<yandex>
  <clickhouse_remote_servers>
    <default>
      @cluster@
    </default>
  </clickhouse_remote_servers>
  <zookeeper-servers>
    @zookeeper@
  </zookeeper-servers>
  <macros>
    <shard>@shard@</shard>
    <replica>@replica@</replica>
  </macros>
</yandex>"""

SHARD = """
      <shard>
        <internal_replication>true</internal_replication>
        @servers@
      </shard>
      """

REPLICA = """<replica>
          <host>@host@</host>
          <port>9000</port>
        </replica>
        """

NODE = """<node index="@index@">
      <host>@host@</host>
      <port>2181</port>
    </node>
    """

HOST = """<yandex>
  <listen_host>::</listen_host>
  <listen_host>0.0.0.0</listen_host>
  <listen_try>1</listen_try>
</yandex>
  """

# 获取主机名
def get_hostname(host, user, password):
    ssh = paramiko.SSHClient()
    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    ssh.connect(host,22,user,password)
    stdin, stdout, stderr = ssh.exec_command("hostname -s")
    hostname = stdout.read().decode().replace("\n", "")
    return hostname
    
# 生成cluster相关配置xml节
def generate_cluster(replica_num, host_list, user, password):
  index = 0
  tmp_shards = ""
  tmp_replica = ""
  while index < len(host_list):
    if index % replica_num == 0:
      if index > 0:
        tmp_shards = tmp_shards + SHARD.replace("@servers@", tmp_replica)
      tmp_replica = ""
      tmp_replica = tmp_replica + REPLICA.replace("@host@", get_hostname(host_list[index], user, password))
    elif index == len(host_list) - 1:
      tmp_replica = tmp_replica + REPLICA.replace("@host@", get_hostname(host_list[index], user, password))
      tmp_shards = tmp_shards + SHARD.replace("@servers@", tmp_replica)
    else:
      tmp_replica = tmp_replica + REPLICA.replace("@host@", get_hostname(host_list[index], user, password))
    index += 1
    
  config = YANDEX.replace("@cluster@", tmp_shards)
  return config

# 生成zk相关配置xml节
def generate_zookeeper(zk_host_list, config):
  index = 0
  tmp_nodes= ""
  while index < len(zk_host_list):
    node = NODE.replace("@host@", zk_host_list[index])
    node = node.replace("@index@", str(index + 1))
    tmp_nodes = tmp_nodes + node
    index += 1
    
  config = config.replace("@zookeeper@", tmp_nodes)
  return config

# 生成macros相关xml节,并生成各个主机的metrika.xml配置文件,例如metrika.xml-10.0.0.2
def generate_metrika_config_file(shard_num, replica_num, host_list, config, user, password):
  index = 0
  for i in range(shard_num):
    for j in range(replica_num):
      macros = config.replace("@shard@", str(i))
      macros = macros.replace("@replica@", get_hostname(host_list[index], user, password))
      f = open(sys.path[0] + "/metrika.xml" + "-" + host_list[index], 'w')
      f.write(macros)
      f.close()
      index += 1

# 生成clickhouse允许的ip访问权限配置文件
def generate_listen_config_file():
  f = open(sys.path[0] + "/listen_host.xml", 'w')
  f.write(HOST)
  f.close()

# 推送metrika.xml-*和listen_host.xml到各个主机
def push_config_file(host_list, remote_path, user, password):
  if remote_path == "":
    remote_path = "/etc/"
  for host in host_list:
    transport = paramiko.Transport((host, 22))
    transport.connect(username=user, password=password)
    sftp = paramiko.SFTPClient.from_transport(transport)

    metrika_path = sys.path[0] + "/metrika.xml-" + host
    sftp.put(metrika_path, remote_path + "metrika.xml")

    ssh = paramiko.SSHClient()
    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    ssh.connect(host,22,user,password)
    cmd = "mkdir -p /etc/clickhouse-server/config.d"
    stdin, stdout, stderr = ssh.exec_command(cmd)

    listen_host_path = sys.path[0] + "/listen_host.xml"
    sftp.put(listen_host_path, remote_path + "clickhouse-server/config.d/listen_host.xml")
    
    transport.close()

# 在各个主机上用yum安装并启动clickhouse
def install_and_start_clickhouse(host, user, password):
  ssh = paramiko.SSHClient()
  ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
  ssh.connect(host,22,user,password)
  cmd = "{} && {} && {}".format("curl -s https://packagecloud.io/install/repositories/altinity/clickhouse/script.rpm.sh | bash",
                                "yum install -y clickhouse-server clickhouse-client",
                                "/etc/init.d/clickhouse-server restart")
  stdin, stdout, stderr = ssh.exec_command(cmd)
  errmsg = stderr.read()
  if errmsg.decode() != "":
    print(errmsg)
  else:
    print("host " + host + " install finished and started")
  ssh.close()

# 获取/etc/hosts需要配置的host信息
def get_hosts(host_list):
  host_list_str = ""
  for host in host_list:
    ssh = paramiko.SSHClient()
    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    ssh.connect(host,22,user,password)
    stdin, stdout, stderr = ssh.exec_command("hostname -s")
    hostname = stdout.read().decode().replace("\n", "")
    stdin, stdout, stderr = ssh.exec_command("ifconfig -a|grep inet|grep -v 127.0.0.1|grep -v inet6|awk '{print $2}'|tr -d \"addr:\"")
    ip = stdout.read().decode().replace("\n", "")
    host_list_str = host_list_str + ip + " " + hostname + "\n"
  return host_list_str

# 将host信息推送到各个主机节点
def add_hosts(host, host_list_str):
  ssh = paramiko.SSHClient()
  ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
  ssh.connect(host,22,user,password)
  add_host_str = 'echo "{}" >> /etc/hosts'.format(host_list_str)
  stdin, stdout, stderr = ssh.exec_command(add_host_str)

# 启动clickhouse集群
def start_clickhouse_cluster(host_list, user, password):
  host_list_str = get_hosts(host_list)
  for host in host_list:
    hostname = get_hostname(host, user, password)
    add_hosts(host, "127.0.0.1 " + hostname + "\n" + host_list_str)
    install_and_start_clickhouse(host, user, password)

# 读取配置文件config.toml
def read_config():
  config_file = 'config.toml'
  if not os.path.exists(config_file):
    input(config_file + ' not found')
    sys.exit(-1)
  with open(config_file, mode='rb') as f:
    content = f.read()
  if content.startswith(b'\xef\xbb\xbf'):
    content = content[3:]
  dic = toml.loads(content.decode('utf8'))
  return dic


if __name__ == "__main__":
  dic = read_config()

  shard_num = dic['shard_num']
  print('shard_num: %s' % shard_num)

  replica_num = dic['replica_num']
  print('replica_num: %s' % replica_num)

  user = dic['user'].strip()
  print('user: %s' % user)

  password = dic['password'].strip()
  print('password: %s' % password)

  host_list = dic['host_list']
  print('host_list: %s' % host_list)

  zk_host_list = dic['zk_host_list']
  print('zk_host_list: %s' % zk_host_list)

  remote_path = dic['remote_path'].strip()
  print('remote_path: %s' % remote_path)

  if len(host_list) % replica_num:
    print("host_list is invalid")

  config = generate_cluster(replica_num, host_list, user, password)
  config = generate_zookeeper(zk_host_list, config)

  generate_listen_config_file()

  generate_metrika_config_file(shard_num, replica_num, host_list, config, user, password)

  push_config_file(host_list, remote_path, user, password)

  start_clickhouse_cluster(host_list, user, password)

将脚本和其配置文件放在同路径下,运行脚步即可

python3 config_gen.py
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

一只努力的微服务

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值