Clickhouse定时归档脚本
clickhouse官方文档: https://clickhouse.com/docs
1. Clickhouse功能脚本
#!/bin/bash
PARTITION_SUPPORT_OPERATE=("DETACH" "DROP" "DROP DETACHED" "ATTACH" "FREEZE" "UNFREEZE")
ck_part_ctl() {
operate=$(echo "$1" | tr '[:lower:]' '[:upper:]')
database=$2
table=$3
partition=$4
# 检查参数是否为空
if [ -z "$operate" ] || [ -z "$database" ] || [ -z "$table" ] || [ -z "$partition" ]; then
echo "Error: One or more of the required parameters (operate,database, table, partition) are empty."
return 400
fi
# 检查操作类型是否正确
operate_valid=0
for item in "${PARTITION_SUPPORT_OPERATE[@]}"; do
if [ "$item" == "$operate" ]; then
operate_valid=1
break
fi
done
if [ "$operate_valid" == "0" ]; then
echo "Error: operate is not valid, suppprt operate are : ${PARTITION_SUPPORT_OPERATE[@]}."
return 400
fi
# 执行ClickHouse命令
clickhouse-client --query="ALTER TABLE ${database}.${table} ${operate} PARTITION '${partition}'"
# 检查ClickHouse命令的返回值
if [ $? -ne 0 ]; then
echo "Error: Failed to ${operate} partition '${partition}' to table '${database}.${table}'."
return 1
fi
echo "Partition '${partition}' ${operate} successfully to table '${database}.${table}'."
}
ck_backup() {
database=$1
table=$2
partition=$3
partition_file_prefix=$(echo "$partition" | tr -d '-')
# 备份服务器
remote_host=$4
remote_user=$5
if [ -z "$remote_host" ]; then
remote_host=$GLOBAL_REMOTE_HOST
fi
if [ -z "$remote_user" ]; then
remote_user=$GLOBAL_REMOTE_USER
fi
# 检查参数是否为空
if [ -z "$database" ] || [ -z "$table" ] || [ -z "$partition" ] || [ -z "$remote_host" ] || [ -z "$remote_user" ]; then
echo "Error: One or more of the required parameters (database, table, partition, remote_host, remote_user) are empty."
return 400
fi
# ck home
clickhouse_home=$GLOBAL_CLICKHOUSE_HOME
if [ -z "$clickhouse_home" ]; then
echo "Error: GLOBAL_CLICKHOUSE_HOME env variable is required."
return 400
fi
# 远程服务是否可连接
remote_backup_path=$GLOBAL_BACKUP_PATH
if [ -z "$remote_backup_path" ]; then
echo "Error: GLOBAL_BACKUP_PATH env variable is required."
return 400
fi
remote_backup_path=$remote_backup_path/${database}/${table}/$partition_file_prefix
ssh -o ConnectTimeout=5 $remote_user@$remote_host "exit"
if [ $? -ne 0 ]; then
echo "Error: Can't connect remote server $remote_host with user $remote_user."
return 502
fi
# 执行ck detach命令
ck_part_ctl "DETACH" "$database" "$table" "$partition"
if [ $? -ne 0 ]; then
return 1
fi
# 检查是否有备份数据
back_dir_cnt=$(find $clickhouse_home/data/${database}/${table}/detached/ -maxdepth 1 -name "$partition_file_prefix"* | wc -l)
if [ $back_dir_cnt -eq 0 ] ; then
echo "There is a empty data collection in ${database}.${table} with partition $partition."
return 0
fi
# 临时工作空间
work_space_dir=${GLOBAL_WORK_SPACE_DIR:-/tmp}
work_space=$work_space_dir/backup/$database/$table/$partition_file_prefix
rm -rf "$work_space" && mkdir -p "$work_space" && cd "$work_space" || {
echo "Failed to create or enter work space directory."
return 1
}
# 数据压缩
cp -r $clickhouse_home/data/${database}/${table}/detached/$partition_file_prefix* .
back_tar_file=$partition_file_prefix-$(date '+%s').tar.gz
tar czf $back_tar_file $partition_file_prefix*
cd -
# 数据传输
ssh -o ConnectTimeout=5 $remote_user@$remote_host "mkdir -p $remote_backup_path"
scp -o ConnectTimeout=5 $work_space/$back_tar_file $remote_user@$remote_host:$remote_backup_path/$back_tar_file
if [ $? -ne 0 ]; then
echo "Error: scp $work_space/$back_tar_file to remote server $remote_user@$remote_host:$remote_backup_path/$back_tar_file , failed"
rm -rf $work_space
ck_part_ctl "ATTACH" "$database" "$table" "$partition"
return 502
fi
# 删除数据
rm -rf $work_space && rm -rf $clickhouse_home/data/${database}/${table}/detached/$partition_file_prefix*
echo "Backup ${database}.${table} with partition '${partition}' to remote server $remote_user@$remote_host success."
}
ck_range_backup() {
database=$1
table=$2
partition_begin=$3
partition_end=$4
# 检查参数是否为空
if [ -z "$database" ] || [ -z "$table" ] || [ -z "$partition_begin" ] || [ -z "$partition_end" ]; then
echo "Error: One or more of the required parameters (operate,database, table, partition_begin, partition_end) are empty."
return 400
fi
# 查询所有partition
partition_list=$(clickhouse-client --query="select distinct partition from system.parts where database = '${database}' and table = '${table}'
and partition between '${partition_begin}' and '${partition_end}' order by partition")
partition_arr=($partition_list)
for partition in "${partition_arr[@]}"; do
ck_backup "${database=$1}" "${table}" "${partition}"
done
}
ck_restore() {
database=$1
table=$2
partition=$3
partition_file_prefix=$(echo "$partition" | tr -d '-')
# 备份服务器
remote_host=$4
remote_user=$5
if [ -z "$remote_host" ]; then
remote_host=$GLOBAL_REMOTE_HOST
fi
if [ -z "$remote_user" ]; then
remote_user=$GLOBAL_REMOTE_USER
fi
# 检查参数是否为空
if [ -z "$database" ] || [ -z "$table" ] || [ -z "$partition" ] || [ -z "$remote_host" ] || [ -z "$remote_user" ]; then
echo "Error: One or more of the required parameters (database, table, partition, remote_host, remote_user) are empty."
return 400
fi
# ck home
clickhouse_home=$GLOBAL_CLICKHOUSE_HOME
if [ -z "$clickhouse_home" ]; then
echo "Error: GLOBAL_CLICKHOUSE_HOME env variable is required."
return 400
fi
# 远程服务是否可连接
remote_backup_path=$GLOBAL_BACKUP_PATH
if [ -z "$remote_backup_path" ]; then
echo "Error: GLOBAL_BACKUP_PATH env variable is required."
return 400
fi
remote_backup_path=$remote_backup_path/${database}/${table}/$partition_file_prefix
ssh -o ConnectTimeout=5 $remote_user@$remote_host "exit"
if [ $? -ne 0 ]; then
echo "Error: Can't connect remote server $remote_host with user $remote_user."
return 502
fi
# 临时工作空间
work_space_dir=${GLOBAL_WORK_SPACE_DIR:-/tmp}
work_space=$work_space_dir/backup/$database/$table/$partition_file_prefix
rm -rf "$work_space" && mkdir -p "$work_space" && cd "$work_space" || {
echo "Failed to create or enter work space directory."
return 1
}
scp -o ConnectTimeout=5 $remote_user@$remote_host:$remote_backup_path/*.gz . >/dev/null 2>&1 || true
# 检查是否有备份数据
back_tar_cnt=$(find . -maxdepth 1 -name '*.gz' | wc -l)
if [ $back_tar_cnt -eq 0 ] ; then
echo "There is a empty backup in ${database}.${table} with partition $partition."
return 0
fi
# 解压
for tarFile in $work_space/*; do
tar xf $tarFile -C $clickhouse_home/data/${database}/${table}/detached/
done
cd -- >/dev/null && rm -rf $work_space
chown -R clickhouse:clickhouse $clickhouse_home/data/${database}/${table}/detached/
chmod -R 755 $clickhouse_home/data/${database}/${table}/detached/
# 执行ck attach命令
ck_part_ctl "ATTACH" "$database" "$table" "$partition"
if [ $? -ne 0 ]; then
return 1
fi
echo "Restore ${database}.${table} with partition '${partition}' from remote server $remote_user@$remote_host success."
}
脚本功能:
ck_part_ctl:支持对partition进行DETACH、DROP、FREEZE等操作
参数1: 操作,支持PARTITION_SUPPORT_OPERATE数组中的操作类型,大小写不敏感
参数2: 数据库名
参数3: 表名
参数4: 表分片,大部分日期业务分片如:1970-01-01,也有系统表按月分片202001.
ck_backup: 备份clickhouse数据到远程服务器
参数1: 数据库名
参数2: 表名
参数3: 表分片,大部分日期业务分片如:1970-01-01,也有系统表按月分片202001.
参数4: 远程服务器host(可选,如果不传,读取GLOBAL_REMOTE_HOST)
参数5: 远程服务器user(可选,如果不传,读取GLOBAL_REMOTE_USER)
依赖环境变量GLOBAL_CLICKHOUSE_HOME:clickhouse服务磁盘地址,例如/data/clickhouse
依赖环境变量GLOBAL_BACKUP_PATH:远程服务器的备份地址,列如/data/clickhouse/backup
依赖环境变量 GLOBAL_WORK_SPACE_DIR:临时操作目录需提前创建,例如/data/clickhouse/tmp(可选,默认/tmp)
ck_range_backup: 依赖ck_backup方法,传入一个partition范围备份
参数1: 数据库名
参数2: 表名
参数3: 分片范围最小值
参数4: 分片范围最大值
依赖环境变量GLOBAL_REMOTE_HOST: 远程服务地址host
依赖环境变量GLOBAL_REMOTE_USER: 远程服务地址用户
依赖环境变量GLOBAL_CLICKHOUSE_HOME:clickhouse服务磁盘地址,例如/data/clickhouse
依赖环境变量GLOBAL_BACKUP_PATH:远程服务器的备份地址,列如/data/clickhouse/backup
依赖环境变量 GLOBAL_WORK_SPACE_DIR:临时操作目录需提前创建,例如/data/clickhouse/tmp(可选,默认/tmp)
ck_restore: 备份还原,从远程备份服务器拉取数据存于本地clickhouse
参数1: 数据库名
参数2: 表名
参数3: 表分片,大部分日期业务分片如:1970-01-01,也有系统表按月分片202001.
参数4: 远程服务器host(可选,如果不传,读取GLOBAL_REMOTE_HOST)
参数5: 远程服务器user(可选,如果不传,读取GLOBAL_REMOTE_USER)
依赖环境变量GLOBAL_CLICKHOUSE_HOME:clickhouse服务磁盘地址,例如/data/clickhouse
依赖环境变量GLOBAL_BACKUP_PATH:远程服务器的备份地址,列如/data/clickhouse/backup
依赖环境变量 GLOBAL_WORK_SPACE_DIR:临时操作目录需提前创建,例如/data/clickhouse/tmp(可选,默认/tmp)
2 定时归档任务脚本
#!/bin/bash
export GLOBAL_REMOTE_HOST=192.160.1.1
export GLOBAL_REMOTE_USER=root
export GLOBAL_CLICKHOUSE_HOME=/data/clickhouse
export GLOBAL_BACKUP_PATH=/data/clickhouse/backup
export GLOBAL_WORK_SPACE_DIR=/data/clickhouse/tmpworkspace
base_dir=$(dirname "$0")
source $base_dir/clickhousectl.sh
# 备份日期,当前时间减1年零1天
archive_date=$(date -d "1 year ago 1 day ago" "+%Y-%m-%d")
archive_tables=(table1 table2 table3 table4)
archive_db=test_db
for archive_table in "${archive_tables[@]}"; do
ck_backup "$archive_db" "$archive_table" "$archive_date"
done
加入定时任务
crontab -e
23 * * * * /data/clickhouse/script/ck_archive.sh >> /data/clickhouse/script/archive.log