归档数据库(带时间年限)
#!/bin/bash
##############################变量修改定义区################################
# 需要改表名,默认源和归档表是一致的
SOURCE_TABLE="表名"
BACKUP_TABLE="表名"
# 归档年限(用于获取归档日期的最大值和最小值)
ARCHIVE_TIME_LIMIT='DATE_SUB(CURDATE(), INTERVAL 1 YEAR )'
# 需要修改时间字段名
DATE_FIELD="时间字段名"
# ID字段或唯一字段(防止出现重复时间无法取值问题,如果不存在时间重复问题,可不设置,不设置默认使用DATE_FIELD变量)
ID_FIELD="ID字段或唯一字段"
if [ -z "$ID_FIELD" ];then
FIELD_NAME=$DATE_FIELD
else
FIELD_NAME=$ID_FIELD
fi
echo "FIELD_NAME参数为:$FIELD_NAME"
#############################以下是数据库连接###############################
# 归档目的MySQL数据库连接信息
DB_HOST="数据库连接地址 "
DB_USER="用户名"
DB_PASS="密码"
DB_NAME="库名"
PORT="端口"
# 归档源MySQL数据库连接信息
DB_HOST1="数据库连接地址"
DB_USER1="用户名"
DB_PASS1="密码"
DB_NAME1="库名"
PORT1="端口"
################################变量定义结束################################
#获取当天日期
TODAY_DATE=$(date +%Y-%m-%d)
#获取最近的一次数据时间,用于截止时间
TBL_NEW_TIME=$(mysql -h $DB_HOST1 -u $DB_USER1 -p$DB_PASS1 -P$PORT1 -D $DB_NAME1 -se "SELECT $DATE_FIELD FROM $DB_NAME1.$SOURCE_TABLE order by $DATE_FIELD DESC limit 1")
#获取当前表的总行数
START_TBL_TOTAL_LINE=$(mysql -h $DB_HOST1 -u $DB_USER1 -p$DB_PASS1 -P$PORT1 -D $DB_NAME1 -se "SELECT count(*) FROM $DB_NAME1.$SOURCE_TABLE where $DATE_FIELD <= '$TBL_NEW_TIME'")
# 获取创建时间字段的最大值和最小值(抓取指定字段当前日期的几年前之前的记录,并获取日期的最大和最小值)
MIN_DATE=$(mysql -h $DB_HOST1 -u $DB_USER1 -p$DB_PASS1 -P$PORT1 -D $DB_NAME1 -se "SELECT MIN($DATE_FIELD) FROM $DB_NAME1.$SOURCE_TABLE where $DATE_FIELD < $ARCHIVE_TIME_LIMIT")
MAX_DATE=$(mysql -h $DB_HOST1 -u $DB_USER1 -p$DB_PASS1 -P$PORT1 -D $DB_NAME1 -se "SELECT MAX($DATE_FIELD) FROM $DB_NAME1.$SOURCE_TABLE where $DATE_FIELD < $ARCHIVE_TIME_LIMIT")
#echo $MIN_DATE1
#echo $MAX_DATE1
#MIN_DATE=$(date -d "$MIN_DATE1" +%Y-%m-%d %H:%M:%S)
#MAX_DATE=$(date -d "$MAX_DATE1" +%Y-%m-%d %H:%M:%S)
echo $MIN_DATE
echo $MAX_DATE
# 将最小日期转换为日期格式
CURRENT_DATE=$(date -d "$MIN_DATE" +%Y-%m-%d)
# 循环操作,每次插入十万行数据到备份表
BATCH_SIZE=100000
OFFSET=0
while [[ "$CURRENT_DATE" < "$MAX_DATE" ]]; do
NEXT_DATE=$(date -d "$CURRENT_DATE +1 days" +%Y-%m-%d)
while true; do
mysql -h $DB_HOST -u $DB_USER -p$DB_PASS -P$PORT -D $DB_NAME -e "
INSERT INTO $BACKUP_TABLE
SELECT * FROM jdbc_catalog.$DB_NAME1.$SOURCE_TABLE
WHERE $DATE_FIELD >= '$CURRENT_DATE' AND $DATE_FIELD < '$NEXT_DATE'
AND $DATE_FIELD >= '$MIN_DATE' AND $DATE_FIELD <= '$MAX_DATE'
LIMIT $BATCH_SIZE OFFSET $OFFSET;
"
OFFSET=$((OFFSET + BATCH_SIZE))
done
# 更新日期和偏移量
CURRENT_DATE=$NEXT_DATE
OFFSET=0
done
echo "数据插入完成。"
#获取创建时间字段目的归档库总数和以及源库总数
SO_COUNT=$(mysql -h $DB_HOST -u $DB_USER -p$DB_PASS -P$PORT -D $DB_NAME -se "SELECT count(1) FROM $DB_NAME.$BACKUP_TABLE where $DATE_FIELD >= '$MIN_DATE' and $DATE_FIELD <= '$MAX_DATE' ")
DE_COUNT=$(mysql -h $DB_HOST1 -u $DB_USER1 -p$DB_PASS1 -P$PORT1 -D $DB_NAME1 -se "SELECT count(1) FROM $DB_NAME1.$SOURCE_TABLE where $DATE_FIELD >= '$MIN_DATE' and $DATE_FIELD <= '$MAX_DATE' ")
echo "Count from SO_COUNT: $SO_COUNT"
echo "Count from DE_COUNT: $DE_COUNT"
#对比归档的数据总量是否一致
if [ "$SO_COUNT" -eq "$DE_COUNT" ]; then
echo "归档数据总行数对比一致"
else
echo "【ERROR】归档数据总行数对比不一致"
exit 1
fi
#随机获取创建时间字段的归档里面的一条数据和以及源库里面的一条数据
DATA_DATE1=$(date -d "$MAX_DATE" +%Y-%m-%d)
DATA_DATE2=$(date -d "$DATA_DATE1 -90 days" +%Y-%m-%d)
sr_data=$(mysql -h $DB_HOST -u $DB_USER -p$DB_PASS -P$PORT -D $DB_NAME -se "SELECT * FROM $DB_NAME.$BACKUP_TABLE where $DATE_FIELD < '$DATA_DATE1' and $DATE_FIELD >= '$DATA_DATE2' order by $FIELD_NAME DESC LIMIT 1 OFFSET 9")
tx_data=$(mysql -h $DB_HOST1 -u $DB_USER1 -p$DB_PASS1 -P$PORT1 -D $DB_NAME1 -se "SELECT * FROM $DB_NAME1.$SOURCE_TABLE where $DATE_FIELD < '$DATA_DATE1' and $DATE_FIELD >= '$DATA_DATE2' order by $FIELD_NAME DESC LIMIT 1 OFFSET 9")
echo "随机获取数据的最大时间范围为$DATA_DATE1"
echo "随机获取数据的最小时间范围为$DATA_DATE2"
# 判断两个变量是否为空
if [ -z "$sr_data" ] || [ -z "$tx_data" ]; then
echo "【ERROR】获取第10行源数据/归档数据时,发现数据为空" && exit 1
else
echo "获取第10行归档数据为:$sr_data"
echo "获取第10行源数据为:$tx_data"
fi
MD5_1=$(echo -n "$sr_data" | md5sum | awk '{print $1}')
MD5_2=$(echo -n "$tx_data" | md5sum | awk '{print $1}')
echo "MD5 of data from sr_data: $MD5_1"
echo "MD5 of data from tx_data: $MD5_2"
#对比归档的数据其中一条数据的MD5是否一致
if [ "$MD5_1" == "$MD5_2" ]; then
echo "数据MD5码对比一致"
else
echo "【ERROR】数据MD5码对比不一致"
exit 1
fi
#备份归档数据到本地文本
if mysqldump -h$DB_HOST1 -P$PORT1 -u$DB_USER1 -p$DB_PASS1 --databases $DB_NAME1 --tables $SOURCE_TABLE --where="$DATE_FIELD >= '$MIN_DATE' and $DATE_FIELD <= '$MAX_DATE'" --skip-add-drop-table --no-create-info --complete-insert > /home/archived_data/$SOURCE_TABLE-$TODAY_DATE.sql;then
#删除数据并备份数
mysql -h $DB_HOST1 -u $DB_USER1 -p$DB_PASS1 -P$PORT1 -D $DB_NAME1 -se "DELETE FROM $DB_NAME1.$SOURCE_TABLE where $DATE_FIELD >= '$MIN_DATE' and $DATE_FIELD <= '$MAX_DATE'"
else
exit 1
fi
#归档后行数校验
#获取归档后行数,范围加上截止时间变量,防止数据新增导致行数不一致
END_TBL_TOTAL_LINE=$(mysql -h $DB_HOST1 -u $DB_USER1 -p$DB_PASS1 -P$PORT1 -D $DB_NAME1 -se "SELECT count(*) FROM $DB_NAME1.$SOURCE_TABLE where $DATE_FIELD <= '$TBL_NEW_TIME'")
if [ -z "$END_TBL_TOTAL_LINE" ];then
echo "【ERROR】获取归档后行数失败!" && exit 1
else
echo "归档后数据行数为:$END_TBL_TOTAL_LINE"
fi
# 计算差值
DIFFERENCE=$((START_TBL_TOTAL_LINE - END_TBL_TOTAL_LINE))
#校验行数生产库运行前行数-生产库归档后行数=已归档行数
if [ "$DIFFERENCE" -eq "$SO_COUNT" ];then
echo "校验正常,$TODAY_DATE已归档数据与生产库差值数据相同"
else
echo "【ERROR】校验异常,$TODAY_DATE已归档数据与生产库差值不相等,生产库差值行数为:$DIFFERENCE,已归档数据行数为:$SO_COUNT"
exit 1
fi
归档数据库(全归档)
#!/bin/bash
##############################变量修改定义区################################
# 需要改表名,默认源和归档表是一致的
SOURCE_TABLE="表名"
BACKUP_TABLE="表名"
# 需要修改时间字段名
DATE_FIELD="时间字段名"
# ID字段或唯一字段(防止出现重复时间无法取值问题,如果不存在时间重复问题,可不设置,不设置默认使用DATE_FIELD变量)
ID_FIELD="ID字段或唯一字段"
if [ -z "$ID_FIELD" ];then
FIELD_NAME=$DATE_FIELD
else
FIELD_NAME=$ID_FIELD
fi
echo "FIELD_NAME参数为:$FIELD_NAME"
#############################以下是数据库连接###############################
# 归档目的MySQL数据库连接信息
DB_HOST="数据库连接地址 "
DB_USER="用户名"
DB_PASS="密码"
DB_NAME="库名"
PORT="端口"
# 归档源MySQL数据库连接信息
DB_HOST1="数据库连接地址"
DB_USER1="用户名"
DB_PASS1="密码"
DB_NAME1="库名"
PORT1="端口"
################################变量定义结束################################
#获取当天日期
TODAY_DATE=$(date +%Y-%m-%d)
#获取最近的一次数据时间,用于截止时间
TBL_NEW_TIME=$(mysql -h $DB_HOST1 -u $DB_USER1 -p$DB_PASS1 -P$PORT1 -D $DB_NAME1 -se "SELECT $DATE_FIELD FROM $DB_NAME1.$SOURCE_TABLE order by $DATE_FIELD DESC limit 1")
#获取当前表的总行数
START_TBL_TOTAL_LINE=$(mysql -h $DB_HOST1 -u $DB_USER1 -p$DB_PASS1 -P$PORT1 -D $DB_NAME1 -se "SELECT count(*) FROM $DB_NAME1.$SOURCE_TABLE where $DATE_FIELD <= '$TBL_NEW_TIME'")
# 获取创建时间字段的最大值和最小值(抓取指定字段当前日期的几年前之前的记录,并获取日期的最大和最小值)
MIN_DATE=$(mysql -h $DB_HOST1 -u $DB_USER1 -p$DB_PASS1 -P$PORT1 -D $DB_NAME1 -se "SELECT MIN($DATE_FIELD) FROM $DB_NAME1.$SOURCE_TABLE where $DATE_FIELD")
MAX_DATE=$(mysql -h $DB_HOST1 -u $DB_USER1 -p$DB_PASS1 -P$PORT1 -D $DB_NAME1 -se "SELECT MAX($DATE_FIELD) FROM $DB_NAME1.$SOURCE_TABLE where $DATE_FIELD")
#echo $MIN_DATE1
#echo $MAX_DATE1
#MIN_DATE=$(date -d "$MIN_DATE1" +%Y-%m-%d %H:%M:%S)
#MAX_DATE=$(date -d "$MAX_DATE1" +%Y-%m-%d %H:%M:%S)
echo $MIN_DATE
echo $MAX_DATE
# 将最小日期转换为日期格式
CURRENT_DATE=$(date -d "$MIN_DATE" +%Y-%m-%d)
# 循环操作,每次插入十万行数据到备份表
BATCH_SIZE=100000
OFFSET=0
while [[ "$CURRENT_DATE" < "$MAX_DATE" ]]; do
NEXT_DATE=$(date -d "$CURRENT_DATE +1 days" +%Y-%m-%d)
while true; do
mysql -h $DB_HOST -u $DB_USER -p$DB_PASS -P$PORT -D $DB_NAME -e "
INSERT INTO $BACKUP_TABLE
SELECT * FROM jdbc_catalog.$DB_NAME1.$SOURCE_TABLE
WHERE $DATE_FIELD >= '$CURRENT_DATE' AND $DATE_FIELD < '$NEXT_DATE'
AND $DATE_FIELD >= '$MIN_DATE' AND $DATE_FIELD <= '$MAX_DATE'
LIMIT $BATCH_SIZE OFFSET $OFFSET;
"
OFFSET=$((OFFSET + BATCH_SIZE))
done
# 更新日期和偏移量
CURRENT_DATE=$NEXT_DATE
OFFSET=0
done
echo "数据插入完成。"
#获取创建时间字段目的归档库总数和以及源库总数
SO_COUNT=$(mysql -h $DB_HOST -u $DB_USER -p$DB_PASS -P$PORT -D $DB_NAME -se "SELECT count(1) FROM $DB_NAME.$BACKUP_TABLE where $DATE_FIELD >= '$MIN_DATE' and $DATE_FIELD <= '$MAX_DATE' ")
DE_COUNT=$(mysql -h $DB_HOST1 -u $DB_USER1 -p$DB_PASS1 -P$PORT1 -D $DB_NAME1 -se "SELECT count(1) FROM $DB_NAME1.$SOURCE_TABLE where $DATE_FIELD >= '$MIN_DATE' and $DATE_FIELD <= '$MAX_DATE' ")
echo "Count from SO_COUNT: $SO_COUNT"
echo "Count from DE_COUNT: $DE_COUNT"
#对比归档的数据总量是否一致
if [ "$SO_COUNT" -eq "$DE_COUNT" ]; then
echo "归档数据总行数对比一致"
else
echo "【ERROR】归档数据总行数对比不一致"
exit 1
fi
#随机获取创建时间字段的归档里面的一条数据和以及源库里面的一条数据
DATA_DATE1=$(date -d "$MAX_DATE" +%Y-%m-%d)
DATA_DATE2=$(date -d "$DATA_DATE1 -90 days" +%Y-%m-%d)
sr_data=$(mysql -h $DB_HOST -u $DB_USER -p$DB_PASS -P$PORT -D $DB_NAME -se "SELECT * FROM $DB_NAME.$BACKUP_TABLE where $DATE_FIELD < '$DATA_DATE1' and $DATE_FIELD >= '$DATA_DATE2' order by $FIELD_NAME DESC LIMIT 1 OFFSET 9")
tx_data=$(mysql -h $DB_HOST1 -u $DB_USER1 -p$DB_PASS1 -P$PORT1 -D $DB_NAME1 -se "SELECT * FROM $DB_NAME1.$SOURCE_TABLE where $DATE_FIELD < '$DATA_DATE1' and $DATE_FIELD >= '$DATA_DATE2' order by $FIELD_NAME DESC LIMIT 1 OFFSET 9")
echo "随机获取数据的最大时间范围为$DATA_DATE1"
echo "随机获取数据的最小时间范围为$DATA_DATE2"
# 判断两个变量是否为空
if [ -z "$sr_data" ] || [ -z "$tx_data" ]; then
echo "【ERROR】获取第10行源数据/归档数据时,发现数据为空" && exit 1
else
echo "获取第10行归档数据为:$sr_data"
echo "获取第10行源数据为:$tx_data"
fi
MD5_1=$(echo -n "$sr_data" | md5sum | awk '{print $1}')
MD5_2=$(echo -n "$tx_data" | md5sum | awk '{print $1}')
echo "MD5 of data from sr_data: $MD5_1"
echo "MD5 of data from tx_data: $MD5_2"
#对比归档的数据其中一条数据的MD5是否一致
if [ "$MD5_1" == "$MD5_2" ]; then
echo "数据MD5码对比一致"
else
echo "【ERROR】数据MD5码对比不一致"
exit 1
fi
#备份归档数据到本地文本
if mysqldump -h$DB_HOST1 -P$PORT1 -u$DB_USER1 -p$DB_PASS1 --databases $DB_NAME1 --tables $SOURCE_TABLE --where="$DATE_FIELD >= '$MIN_DATE' and $DATE_FIELD <= '$MAX_DATE'" --skip-add-drop-table --no-create-info --complete-insert > /home/archived_data/$SOURCE_TABLE-$TODAY_DATE.sql;then
#删除数据并备份数
mysql -h $DB_HOST1 -u $DB_USER1 -p$DB_PASS1 -P$PORT1 -D $DB_NAME1 -se "DELETE FROM $DB_NAME1.$SOURCE_TABLE where $DATE_FIELD >= '$MIN_DATE' and $DATE_FIELD <= '$MAX_DATE'"
else
exit 1
fi
#归档后行数校验
#获取归档后行数,范围加上截止时间变量,防止数据新增导致行数不一致
END_TBL_TOTAL_LINE=$(mysql -h $DB_HOST1 -u $DB_USER1 -p$DB_PASS1 -P$PORT1 -D $DB_NAME1 -se "SELECT count(*) FROM $DB_NAME1.$SOURCE_TABLE where $DATE_FIELD <= '$TBL_NEW_TIME'")
if [ -z "$END_TBL_TOTAL_LINE" ];then
echo "【ERROR】获取归档后行数失败!" && exit 1
else
echo "归档后数据行数为:$END_TBL_TOTAL_LINE"
fi
# 计算差值
DIFFERENCE=$((START_TBL_TOTAL_LINE - END_TBL_TOTAL_LINE))
#校验行数生产库运行前行数-生产库归档后行数=已归档行数
if [ "$DIFFERENCE" -eq "$SO_COUNT" ];then
echo "校验正常,$TODAY_DATE已归档数据与生产库差值数据相同"
else
echo "【ERROR】校验异常,$TODAY_DATE已归档数据与生产库差值不相等,生产库差值行数为:$DIFFERENCE,已归档数据行数为:$SO_COUNT"
exit 1
fi