#!/usr/bin/python3
# encoding: utf-8
#filename: tidb-maintenance.py
#author: gaohaixiang
#writetime:202403041043
import subprocess
import sys
import time
"""
# 脚本使用示例
# 集群状态查询并重启失败的服务
python3 tidb-maintenance.py checkServiceForTidb
# 集群的具体操作
# 启动 tidb 集群
python3 tidb-maintenance.py operateTidbCluster tidbStartCluster
# 查看 tidb 集群状态
python3 tidb-maintenance.py operateTidbCluster tidbStatusCluster
# 重启 tidb 集群的某个服务,grafana,pd,prometheus,tidb,tikv
python3 tidb-maintenance.py operateTidbCluster tidbRestartSomeServiceCluster grafana
# 重启 tidb 集群
python3 tidb-maintenance.py operateTidbCluster tidbRestartCluster
# 停止 tidb 集群
python3 tidb-maintenance.py operateTidbCluster tidbStopCluster
# 备注
# 请注意tidb服务所在的ip段,再检测集群服务状态时候需要使用到该ip的段,需要按情修改,第97行
lines.startswith("172.31.")
# tidb集群操作示例
# 启动集群:
/root/.tiup/bin/tiup cluster start tidb-cluster
# 查看集群状态:
/root/.tiup/bin/tiup cluster display tidb-cluster
# 手动重启某个服务
/root/.tiup/bin/tiup cluster start tidb-cluster -R grafana
/root/.tiup/bin/tiup cluster start tidb-cluster -R tidb
# 重启集群
/root/.tiup/bin/tiup cluster restart tidb-cluster
# 停止集群
/root/.tiup/bin/tiup cluster stop tidb-cluster
"""
# 启动 tidb 集群
def tidbStartCluster(getdatetime,filewrite):
fileWriteLine(getdatetime,filewrite)
# tiup cluster start tidb-cluster
output = subprocess.getoutput("/root/.tiup/bin/tiup cluster start tidb-cluster")
filewrite.writelines("\ntidbStartCluster\n")
filewrite.writelines(output)
return output
# 查看 tidb 集群状态
def tidbStatusCluster(getdatetime,filewrite):
fileWriteLine(getdatetime,filewrite)
# tiup cluster display tidb-cluster
output = subprocess.getoutput("/root/.tiup/bin/tiup cluster display tidb-cluster")
filewrite.writelines("\ntidbStatusCluster\n")
filewrite.writelines(output)
return output
# 重启 tidb 集群的某个服务
def tidbRestartSomeServiceCluster(getdatetime,someService,filewrite):
fileWriteLine(getdatetime,filewrite)
# tiup cluster start tidb-cluster -R grafana
output = subprocess.getoutput("/root/.tiup/bin/tiup cluster start tidb-cluster -R %s" % someService)
filewrite.writelines("\ntidbRestartSomeServiceCluster\n")
filewrite.writelines("重启 %s 服务\n" % someService)
filewrite.writelines(output)
return output
# 重启 tidb 集群
def tidbRestartCluster(getdatetime,filewrite):
fileWriteLine(getdatetime,filewrite)
# tiup cluster restart tidb-cluster
output = subprocess.getoutput("/root/.tiup/bin/tiup cluster restart tidb-cluster")
filewrite.writelines("\ntidbRestartCluster\n")
filewrite.writelines(output)
return output
# 停止 tidb 集群
def tidbStopCluster(getdatetime,filewrite):
fileWriteLine(getdatetime,filewrite)
# tiup cluster stop tidb-cluster
output = subprocess.getoutput("/root/.tiup/bin/tiup cluster stop tidb-cluster")
filewrite.writelines("\ntidbStopCluster\n")
filewrite.writelines(output)
return output
# 查看集群的服务状态,当有服务不正常时,重启该服务
def checkServiceForTidb(getdatetime,filewrite):
getoutput = tidbStatusCluster(getdatetime,filewrite)
linelist = getoutput.split("\n")
for lines in linelist:
if lines.startswith("172.31."):
lists = lines.split()
# print(lists)
# 服务名称
serviceName = lists[1]
# 服务状态
serviceStatus = lists[5]
# 如果服务不是 Up 或者 Up|L|UI 则重启该服务
if serviceStatus == "Up" or serviceStatus == "Up|L|UI":
print(serviceName, serviceStatus)
else:
tidbRestartSomeServiceCluster(getdatetime,serviceName, filewrite)
# 日志文件写入换行间隔
def fileWriteLine(getdatetime,filewrite):
filewrite.writelines("\n-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n")
filewrite.writelines(getdatetime)
# 日期时间获取
def timestamp_time():
timestamp = int(time.time())
# 转换成localtime(格式和时间戳一样)
timelocal = time.localtime(timestamp)
# 转换成新的时间格式(3016-05-05 20:28:54)
datetime = time.strftime("%Y%m%d%H%M%S", timelocal)
return datetime
def main(getdatetime,filewrite):
sysargv = sys.argv[1]
# 服务检测并重启失败的服务
if sysargv == "checkServiceForTidb":
checkServiceForTidb(getdatetime,filewrite)
elif sysargv == "operateTidbCluster":
sysargvs = sys.argv[2]
# 服务 集群操作
# # 启动 tidb 集群
if sysargvs == "tidbStartCluster":
tidbStartCluster(getdatetime,filewrite)
# 查看 tidb 集群状态
elif sysargvs == "tidbStatusCluster":
getoutput = tidbStatusCluster(getdatetime,filewrite)
print(getoutput)
# 重启 tidb 集群的某个服务
elif sysargvs == "tidbRestartSomeServiceCluster":
sysargvsservice = sys.argv[3]
tidbRestartSomeServiceCluster(getdatetime,sysargvsservice,filewrite)
# 重启 tidb 集群
elif sysargvs == "tidbRestartCluster":
tidbRestartCluster(getdatetime,filewrite)
# 停止 tidb 集群
elif sysargvs == "tidbStopCluster":
tidbStopCluster(getdatetime,filewrite)
else:
print("请输入正确的操作参数。。。。")
else:
print("请输入集群的正确操作类型")
if __name__ == '__main__':
# 脚本及日志存放路径
logdir = "/data/processlog/"
# 日志文件
tidblog = "tidblog.log"
tidbfile = logdir + tidblog
# 时间获取
getdatetime = timestamp_time()
filewrite = open(tidbfile,"a+",encoding="UTF8")
# 主函数入口
main(getdatetime,filewrite)
filewrite.close()
tidb集群操作
于 2024-03-04 17:35:39 首次发布