一、需求
线上有几个实例,经常磁盘告警,之前每次人工删除,效率实在不高。
二、解决方法
登录跳板机执行脚本删除,不过在删除前,需要导出binary log日志和库表的的数据,最后把备份的传数据传到hdfs。
三、脚本
-
读取配置文件
1.1 mysql.conf,哪些实例,哪些表名
1
2
3
4
5
6
7
|
MYSQL_HOME=/usr/local/mysql/bin/mysql
INCLUDE_MYSQL=(mysql4:
5507
mysql5:
5508
mysql6:
5509
mysql7:
5510
mysql7:
5511
)
EXCLUDE_DB_GA10=(dc_15 dc_17 dc_44 dc_49 dc_88 dc_183 dc_279 dc_490 dc_624 dc_643 dc_903 dc_906 dc_908 dc_954 dc_1099 dc_1100 dc_1167 dc_1214 dc_1463 dc_1464 dc_2444 dc_2445 dc_2695)
APPID_REG=[
0
-9A-Za-z]{
32
,
33
}
DELETE_TABLE_GA10=(dc_datacenter_cache dc_day_report_cache_basic dc_day_report_cache_income dc_day_report_cache_cumu dc_day_report_cache_channel ${APPID_REG_REG}_dc_distributed_everyday front_${APPID_REG}_dc_everyday2 ${APPID_REG}_dc_everyhour ${APPID_REG}_dc_everyday ${APPID_REG}_dc_distributed_everyday2 ${APPID_REG}_dc_distributedweek_everyweek2 ${APPID_REG}_dc_distributedmonth_everymonth2 ${APPID_REG}_dc_pay_distributed_everyday ${APPID_REG}_dc_custom_retain_by_day front_${APPID_REG}_dc_custom_retain_by_day ${APPID_REG}_dc_distributedmonth_everymonth2 ${APPID_REG}_dc_equipment_distributed_by_day ${APPID_REG}_dc_fp_distributed_everyday ${APPID_REG}_dc_event_by_day ${APPID_REG}_dc_event_attr_by_day ${APPID_REG}_dc_error_report_detail ${APPID_REG}_dc_error_report_dist_hour ${APPID_REG}_dc_player_30dayvalue front_${APPID_REG}_dc_player_30dayvalue ${APPID_REG}_dc_player_30day_arpu front_${APPID_REG}_dc_player_30day_arpu ${APPID_REG}_dc_whale_player_day ${APPID_REG}_dc_uid_retain_by_day front_${APPID_REG}_dc_uid_retain_by_day ${APPID_REG}_dc_task_everyday ${APPID_REG}_dc_level_everyday ${APPID_REG}_dc_app_ring ${APPID_REG}_dc_error_report_dist_sys ${APPID_REG}_dc_error_report_detail_sys ${APPID_REG}_dc_error_report_dist_user ${APPID_REG}_dc_error_report_detail_user ${APPID_REG}_dc_rollserver_player_by_day ${APPID_REG}_dc_rollserver_income_by_day ${APPID_REG}_dc_rollserver_retain front_${APPID_REG}_dc_rollserver_retain ${APPID_REG}_dc_tag_everyday ${APPID_REG}_dc_tag_level_outflow ${APPID_REG}_dc_tag_retain_by_day ${APPID_REG}_dc_everyweek ${APPID_REG}_dc_everymonth ${APPID_REG}_dc_distributed_everyweek ${APPID_REG}_dc_distributed_everyweek2 ${APPID_REG}_dc_distributed_everymonth ${APPID_REG}_dc_distributed_everymonth2 ${APPID_REG}_dc_custom_retain_by_week ${APPID_REG}_dc_custom_retain_by_month front_${APPID_REG}_dc_custom_retain_by_week front_${APPID_REG}_dc_custom_retain_by_month)
EXPIRE_LOGS_DAYS=
21
|
1.2 backup.conf
1
2
3
4
|
JAVA_HOME=/usr/java/jdk1.
7
.0_25
LOCAL_FILES=(/home/yaolihong/yao/backup/bak.tar.gz)
BUSINESS_TYPE=ga
CUSTOM_SUB_DIR=mysql
|
2.backup_tools.sh脚本
主要将备份压缩好的数据发往到hdfs,这个会在clean_data0.sh和clean_data1.sh最后调用到
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
|
#!/bin/sh
#
set
-x
#check user
WHO_AM_I=`whoami`
if
[
"hadoop"
== $WHO_AM_I ]; then
echo
"Error: you should not use hadoop user to back files, use another user instead please..."
exit
1
fi
shellLocation=`dirname $
0
`
shellLocation=`cd
"$shellLocation"
; pwd`
. $shellLocation/conf/backup.conf
HADOOP_CMD=
"$shellLocation/bin/hadoop-1.0.4/bin/hadoop"
#check
if
empty
REQUIED_PARAMS=(JAVA_HOME LOCAL_FILES BUSINESS_TYPE CUSTOM_SUB_DIR)
for
PARAM
in
${REQUIED_PARAMS[@]}
do
if
[
"X${!PARAM}"
=
"X"
]; then
echo
"Error: ${PARAM} is not set..."
exit
1
fi
done
export JAVA_HOME
CURRENT_TIME=`date +%Y%m%d-%H_%M_%S`
CURRENT_LOG_FILE=$shellLocation/logs/backup.$CURRENT_TIME.log
#
get
time
CURRENT_YEAR=`date +%Y -d
'1 hours ago'
`
CURRENT_MONTH=`date +%m -d
'1 hours ago'
`
CURRENT_DAY=`date +%d -d
'1 hours ago'
`
LAST_HOUR=`date +%H -d
'1 hours ago'
`
HOUR_STR_FOR_HDFS=
"$CURRENT_YEAR/$CURRENT_MONTH/$CURRENT_DAY/$LAST_HOUR"
BACKUP_BASIC_PATH=
"/backup/$BUSINESS_TYPE/$CUSTOM_SUB_DIR"
BACKUP_DAY_PATH=
"$BACKUP_BASIC_PATH/$CURRENT_YEAR/$CURRENT_MONTH/$CURRENT_DAY"
# 判断是否有传入路径,有则以传入的路径为准
if
[ $# =
1
]; then
BACKUP_DAY_PATH=
"/backup/$BUSINESS_TYPE/$CUSTOM_SUB_DIR/$1"
fi
#check
if
hdfs dir exist and mkdir
$HADOOP_CMD fs -test -e $BACKUP_DAY_PATH
if
[ $? -ne
0
]; then
$HADOOP_CMD fs -mkdir $BACKUP_DAY_PATH >> $CURRENT_LOG_FILE
2
>&
1
fi
HOST_NAME=`/bin/hostname`
IS_ANY_FAILED=
"N"
for
LOCAL_FILE
in
${LOCAL_FILES[@]}
do
#check
if
local file exist
if
[ ! -f $LOCAL_FILE ]; then
IS_ANY_FAILED=
"Y"
echo
"Error: Local file not exist : $LOCAL_FILE"
>> $CURRENT_LOG_FILE
continue
fi
#put local file to hdfs
LOCL_FILE_NAME=`/bin/basename $LOCAL_FILE`
HDFS_FILE_NAME=${LOCL_FILE_NAME}_${HOST_NAME}_${CURRENT_TIME}
$HADOOP_CMD fs -put $LOCAL_FILE $BACKUP_DAY_PATH/$HDFS_FILE_NAME >> $CURRENT_LOG_FILE
2
>&
1
#
if
put failed, we should not touch a done file
if
[ $? -ne
0
]; then
IS_ANY_FAILED=
"Y"
echo
"Error: put file to hdfs failed : $LOCAL_FILE"
>> $CURRENT_LOG_FILE
continue
fi
done
#
delete
log
10
days ago
find $shellLocation/logs/ -mtime +
10
-
delete
if
[ $IS_ANY_FAILED =
"Y"
]; then
exit
1
fi
#
set
+x
|
3.清除数据库脚本clean_data0.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
|
#!/bin/bash
# 加载配置文件
source ./conf/mysql.conf
source ./conf/backup.conf
SHELLLOCATION=`dirname $
0
`
SHELLLOCATION=`cd
"${SHELLLOCATION}"
; pwd`
HADOOP_CMD=
"${SHELLLOCATION}/bin/hadoop-1.0.4/bin/hadoop"
echo
"选择要清理的数据库实例:"
select MYSQL
in
${INCLUDE_MYSQL[@]};
do
break
done
echo
"您选择要清理的实例是:$MYSQL"
MYSQL_HOST=`echo $MYSQL | awk -F
':'
'{print $1}'
`
MYSQL_PORT=`echo $MYSQL | awk -F
':'
'{print $2}'
`
read -p
"数据库账号:"
MYSQL_USER
read -s -p
"数据库密码:"
MYSQL_PWD
TODAY=`date +
"%Y%m%d"
`
if
[ ! -d ./logs/${MYSQL_HOST}/${MYSQL_PORT} ]; then
mkdir -p ./logs/${MYSQL_HOST}/${MYSQL_PORT}
fi
# 定义一个查询数据库的方法
query_mysql()
{
mysql -u${MYSQL_USER} -p${MYSQL_PWD} -h${MYSQL_HOST} -P${MYSQL_PORT} --
default
-character-
set
=utf8 -N -e
"$*"
| sed
"s/^//;s/$//"
}
# 定义一个打印日志的方法
printlog()
{
echo -e $*
echo -e $* >> ./logs/${MYSQL_HOST}/${MYSQL_PORT}/mysql.${TODAY}.log
}
# 定义一个打印删除记录日志的方法
print_delete_log()
{
echo -e $*
echo -e $* >> ./logs/${MYSQL_HOST}/${MYSQL_PORT}/
delete
.${TODAY}.log
}
DB_SET=`query_mysql
"show databases"
| egrep -E
"dc_[0-9]|ga_[0-9]"
`
FAIL_BAK_DB=()
for
DB
in
${DB_SET};
do
# 如果是游戏分析
1.0
的数据库并且该数据库不是需要过滤的数据库,则开始清理
if
[[
"${DB}"
=~ ^dc_[
0
-
9
]*$ && !
" ${EXCLUDE_DB_GA10[@]} "
=~
" ${DB} "
]]; then
DIR=data/${MYSQL_HOST}/${MYSQL_PORT}/${DB}/${TODAY}
if
[ ! -d ./backup/${DIR} ]; then
mkdir -p ./backup/${DIR}
printlog
"开始扫描$DB"
WAIT_DELETE_TABLE=()
for
TABLE
in
`query_mysql
"use ${DB}; show tables"
`;
do
# 遍历需要清理的表
for
INCLUDE_TABLE
in
${DELETE_TABLE_GA10[@]};
do
# 把appid替换成正则表达式
INCLUDE_TABLE=`echo ${INCLUDE_TABLE} | sed
's/^/\^/;s/$/\$/'
`
if
[[ ${TABLE} =~ ${INCLUDE_TABLE} ]]; then
TOTAL_ROW=`query_mysql
"select count(*) from ${DB}.${TABLE}"
`
DELETE_ROW=
0
DELETE_COLUMN=
""
# 如果是日表
if
[[ ! `query_mysql
"desc ${DB}.${TABLE} StatiTime"
` ==
""
]]; then
DELETE_COLUMN=
"StatiTime"
DELETE_ROW=`query_mysql
"select count(*) from ${DB}.${TABLE} where StatiTime < unix_timestamp(date_add(now(), interval -6 month))"
`
#如果是周表或者月表
elif [[ ! `query_mysql
"desc ${DB}.${TABLE} EndDate"
` ==
""
]]; then
DELETE_COLUMN=
"EndDate"
DELETE_ROW=`query_mysql
"select count(*) from ${DB}.${TABLE} where EndDate < unix_timestamp(date_add(now(), interval -6 month))"
`
fi
if
[ ${DELETE_ROW} -gt
0
]; then
WAIT_DELETE_TABLE+=(${TABLE}.${DELETE_COLUMN})
printlog
"${DB}.${TABLE} \t ${TOTAL_ROW} \t ${DELETE_ROW}"
fi
TOTAL_ROW=
0
DELETE_ROW=
0
DELETE_COLUMN=
""
break
fi
done
done
if
[ ${#WAIT_DELETE_TABLE[@]} -gt
0
]; then
read -p
"是否清理历史数据:[Yes/No]"
SURE
if
[ ${SURE} =
"Yes"
]; then
print_delete_log
"开始清除${DB}"
for
WAIT_DELETE
in
${WAIT_DELETE_TABLE[@]};
do
DEL_TABLE=`echo ${WAIT_DELETE} | awk -F
'.'
'{print $1}'
`
DEL_COLUMN=`echo ${WAIT_DELETE} | awk -F
'.'
'{print $2}'
`
mysqldump -u${MYSQL_USER} -p${MYSQL_PWD} -h${MYSQL_HOST} -P${MYSQL_PORT} ${DB} ${DEL_TABLE} -w
"${DEL_COLUMN} < unix_timestamp(date_add(now(), interval -6 month))"
> ./backup/${DIR}/${DEL_TABLE}.sql
DEL_ROW=`query_mysql
"set sql_log_bin=0; delete from ${DB}.${DEL_TABLE} where ${DEL_COLUMN} < unix_timestamp(date_add(now(), interval -6 month))"
`
print_delete_log
"${DEL_TABLE} \t ${DEL_ROW}"
done
echo
"开始打包导出的sql文件!"
tar -zcf ./backup/bak.tar.gz ./backup/${DIR}/*.sql
echo
"开始发送打包文件到hdfs,可能需要几分钟,请耐心等候!"
sh backup_tools.sh
"${DIR}"
if
[[ $? -ne
0
|| `${HADOOP_CMD} fs -ls /backup/${BUSINESS_TYPE}/${CUSTOM_SUB_DIR}/${DIR}/ | grep bak.tar.gz` =
""
]]; then
print_delete_log
"==========${DB}备份失败,请手动备份!=========="
FAIL_BAK_DB+=(${MYSQL}.${DB})
fi
fi
fi
print_delete_log
"==========备份失败的数据库有:${FAIL_BAK_DB[@]}=========="
unset WAIT_DELETE_TABLE
fi
done
|
3.清除日志脚本clean_data1.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
|
#!/bin/bash
# 加载配置文件
source ./conf/mysql.conf
source ./conf/backup.conf
SHELLLOCATION=`dirname $
0
`
SHELLLOCATION=`cd
"${SHELLLOCATION}"
; pwd`
HADOOP_CMD=
"${SHELLLOCATION}/bin/hadoop-1.0.4/bin/hadoop"
echo
"选择要清理的数据库实例:"
select MYSQL
in
${INCLUDE_MYSQL[@]};
do
break
done
echo
"您选择要清理的实例是:${MYSQL}"
MYSQL_HOST=`echo ${MYSQL} | awk -F
':'
'{print $1}'
`
MYSQL_PORT=`echo ${MYSQL} | awk -F
':'
'{print $2}'
`
read -p
"主机账号:"
USER
read -s -p
"主机密码:"
PWD
TARGET_DAY=`date -d
"-${EXPIRE_LOGS_DAYS} day"
+%Y-%m-%d`
MTIME=`expr ${EXPIRE_LOGS_DAYS} -
1
`
TODAY=`date +%Y%m%d`
DIR=binlog/${MYSQL_HOST}/${MYSQL_PORT}/${TODAY}
if
[ ! -d ./backup/${DIR} ]; then
mkdir -p ./backup/${DIR}
fi
rm -f ~/.ssh/known_hosts
for
LOG
in
`expect << EOF
spawn ssh ${USER}@${MYSQL_HOST}
"find /data1/mysql/log${MYSQL_PORT}/mysql-bin.* -mtime +${MTIME} | xargs ls --full-time | grep -v ${TARGET_DAY}"
expect
"no)?"
send
"yes\r"
expect
"password:"
send
"${PWD}\r"
set
timeout -
1
expect eof
EOF`;
do
if
[[ `echo ${LOG} | egrep -E
"mysql-bin\.[0-9]{1,}"
` !=
""
]]; then
rm -f ~/.ssh/known_hosts
expect << EOF
spawn scp ${USER}@${MYSQL_HOST}:${LOG} backup/${DIR}
expect
"no)?"
send
"yes\r"
expect
"password:"
send
"${PWD}\r"
set
timeout -
1
expect eof
EOF
fi
done
echo
"开始打包导出的sql文件!"
tar -zcf ./backup/bak.tar.gz ./backup/${DIR}/mysql-bin.*
echo
"开始发送打包文件到hdfs,可能需要几分钟,请耐心等候!"
sh backup_tools.sh
"${DIR}"
if
[[ $? -ne
0
|| `${HADOOP_CMD} fs -ls /backup/${BUSINESS_TYPE}/${CUSTOM_SUB_DIR}/${DIR}/ | grep bak.tar.gz` =
""
]]; then
echo
"==========备份失败,请手动备份!=========="
else
read -p
"数据库账号:"
MYSQL_USER
read -s -p
"数据库密码:"
MYSQL_PWD
mysql -u${MYSQL_USER} -p${MYSQL_PWD} -h${MYSQL_HOST} -P${MYSQL_PORT} -e
"PURGE BINARY LOGS BEFORE CURRENT_DATE -INTERVAL ${EXPIRE_LOGS_DAYS} DAY"
echo
"备份成功"
fi
|
本文转自 zouqingyun 51CTO博客,原文链接:http://blog.51cto.com/zouqingyun/1739972,如需转载请自行联系原作者