清理脚本
PATH="/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin:/apps/svr/docker/bin"
if [ $UID -ne 0 ] ; then
echo "You need to be root to run this script"
exit 1
fi
BEGIN_TIMESTAMP=$( date + % s)
renice + 19 - p $$ >/ dev/ null 2>&1
ionice - c3 - p $$ >/ dev/ null 2>&1
LOG_DIR="/apps/logs/cron"
LOG_FILE="${LOG_DIR}/clean_logs.$(date +" % Y% m% d").log"
DAY_LOCK_FILENAME="clean_logs.$(date +" % Y% m% d").lock"
DAY_LOCK_FILE="${LOG_DIR}/${DAY_LOCK_FILENAME}"
PROC_LOCK_FILENAME="clean_logs.lock"
PROC_LOCK_FILE="/var/run/${PROC_LOCK_FILENAME}"
CURRENT_H=`date + "%H" | sed 's/^0//' `
CURRENT_M=`date + "%M" | sed 's/^0//' `
LOGS_ROOT_DIR="/apps/logs/log_receiver"
TMP_DATA_ROOT_DIR="/apps/dat/kubernetes-pods"
AI_TMP_ROOT_DIR="/apps/tmp/ai-noah.vip.vip.com"
SPARK_TMP_ROOT_DIR="/apps/tmp/spark-lab"
GC_RATE="70"
TARGET_RATE="60"
HAS_LARGE_LOG_FILES="/tmp/cache_kubelet_has_large_log_file"
if [ [ $CURRENT_M -ge 0 ] ] && [ [ $CURRENT_M -le 5 ] ]
then
exit
elif [ [ $CURRENT_M -ge 56 ] ] && [ [ $CURRENT_M -le 59 ] ]
then
exit
fi
function log( ) {
echo "$( date + '[%F %T]' ) $@" >> $LOG_FILE
}
function lock( ) {
[ [ - e $( readlink - f $PROC_LOCK_FILE ) ] ] | | unlink $PROC_LOCK_FILE >/ dev/ null 2>&1
ln - s / proc/ $$/ cmdline $PROC_LOCK_FILE >/ dev/ null 2>&1
if [ [ $? -eq 0 ] ]
then
trap "unlock 0" 0
trap "unlock 1" 1
trap "unlock 2" 2
trap "unlock 3" 3
trap "unlock 15" 15
return 0
else
log "[CONFLICT] Failed to get lock."
exit 1
fi
}
function unlock( ) {
SIGNAL_NUM=$1
[ [ $SIGNAL_NUM -gt 0 ] ] && log "[EXIT] Receive signal $SIGNAL_NUM . (Cost: $( get_cost_seconds) s)"
[ [ "$( readlink - f $PROC_LOCK_FILE ) " == "/proc/$$/cmdline" ] ] && unlink $PROC_LOCK_FILE >/ dev/ null 2>&1
exit 0
}
lock
[ [ - d $LOG_DIR ] ] | | mkdir - p $LOG_DIR
ROOT_SIZE=`df - BG | awk '$NF == "/"{print $2*1}' `
if [ [ - z $ROOT_SIZE ] ]
then
log "ERROR: Failed to get the size of root partition"
exit 1
fi
if [ [ $ROOT_SIZE -gt 500 ] ]
then
APP_LOGS_GC_DAYS=5
else
APP_LOGS_GC_DAYS=4
fi
function get_cost_seconds( ) {
END_TIMESTAMP=$( date + % s)
COST_SECONDS=$( expr $END_TIMESTAMP - $BEGIN_TIMESTAMP )
[ [ $COST_SECONDS -lt 0 ] ] && COST_SECONDS=0
echo $COST_SECONDS
}
function log_used_info( ) {
USED_INFO=`df - lh | awk '$NF == "/" {print "Partition: /, Size: "$2", Used: "$3", Avail: "$4", Use%: "$5}' `
log $USED_INFO
}
function rm_domain_dir( ) {
DIR_ORIGINAL_PATH="$1 "
if [ [ - d "${DIR_ORIGINAL_PATH}" ] ]
then
DIR_REAL_PATH=`readlink - f "$DIR_ORIGINAL_PATH " `
if echo "${DIR_REAL_PATH}" | egrep "^/apps/logs/log_receiver/[^/]{1,}$|^/apps/dat/kubernetes-pods/[^/]{1,}$" >/ dev/ null 2>&1
then
if [ [ "${DIR_REAL_PATH}" ! = "/apps/logs/log_receiver/ai-noah.vip.vip.com" ] ]
then
rm - rf "${DIR_REAL_PATH}"
log "Remove directory: ${DIR_REAL_PATH}"
fi
else
log "[DENIED] Failed to remove directory: ${DIR_ORIGINAL_PATH}"
fi
else
log "[NOT EXIST] Failed to remove directory: ${DIR_ORIGINAL_PATH}"
fi
}
function rm_pod_dir( ) {
DIR_ORIGINAL_PATH="$1 "
if [ [ - d "${DIR_ORIGINAL_PATH}" ] ]
then
DIR_REAL_PATH=`readlink - f "$DIR_ORIGINAL_PATH " `
if echo "${DIR_REAL_PATH}" | egrep "^/apps/logs/log_receiver/[^/]{1,}/[^/]{1,}$|^/apps/dat/kubernetes-pods/[^/]{1,}/[^/]{1,}$" >/ dev/ null 2>&1
then
rm - rf "${DIR_REAL_PATH}"
log "Remove directory: ${DIR_REAL_PATH}"
else
log "[DENIED] Failed to remove directory: ${DIR_ORIGINAL_PATH}"
fi
else
log "[NOT EXIST] Failed to remove directory: ${DIR_ORIGINAL_PATH}"
fi
}
function rm_pod_file( ) {
FILE_ORIGINAL_PATH="$1 "
if [ [ - f "${FILE_ORIGINAL_PATH}" ] ]
then
FILE_REAL_PATH=`readlink - f "$FILE_ORIGINAL_PATH " `
if echo "${FILE_REAL_PATH}" | egrep "^/apps/logs/log_receiver/[^/]{1,}/[^/]{1,}/[^/]{1,}$" >/ dev/ null 2>&1
then
log_truncate "${FILE_REAL_PATH}"
else
log "[DENIED] Failed to delete file: ${FILE_ORIGINAL_PATH}"
fi
else
log "[NOT EXIST] Failed to delete file: ${FILE_ORIGINAL_PATH}"
fi
}
function clean_logs_dir_pod( ) {
if systemctl status vip- docker >/ dev/ null 2>&1 && [ [ - n $( docker ps - qa) ] ]
then
LOGS_GC_DAYS=10
LOGS_GC_SECONDS=$( ( 86400 * ${ LOGS_GC_DAYS} ) )
NOW_TIMESTAMP=$( date + % s)
let LOGS_GC_TIMESTAMP=$( ( $NOW_TIMESTAMP - $LOGS_GC_SECONDS ) )
POD_NAME_LIST=$( docker inspect -- format "{{index .Config.Labels \" io. kubernetes. pod. name\" }}" $( docker ps - qa) | sort | uniq | sed '/^$/d' )
if [ [ $LOGS_GC_SECONDS -gt 0 ] ]
then
while read LINE
do
read DOMAIN_NAME POD_DIR_NAME < <( echo "$LINE " )
if [ [ - n $DOMAIN_NAME ] ] && [ [ - n $POD_DIR_NAME ] ]
then
POD_NAME=`echo $POD_DIR_NAME | awk - F@ '{print $1}' `
echo $POD_NAME_LIST | tr ' ' '\n' | grep "^${POD_NAME}$" >/ dev/ null 2>&1
if [ [ $? -ne 0 ] ] && [ [ - n $POD_NAME ] ]
then
LOG_DIR_MODIFY_TIME=$( stat ${ LOGS_ROOT_DIR} / ${ DOMAIN_NAME} / ${ POD_DIR_NAME} | grep 'Modify:' | grep - oE '[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}' )
LOG_DIR_CHANGE_TIME=$( stat ${ LOGS_ROOT_DIR} / ${ DOMAIN_NAME} / ${ POD_DIR_NAME} | grep 'Change:' | grep - oE '[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}' )
LOG_DIR_MODIFY_TIMESTAMP=$( date - d "`echo $LOG_DIR_MODIFY_TIME `" +%s)
LOG_DIR_CHANGE_TIMESTAMP=$(date -d " `echo $LOG_DIR_CHANGE_TIME `" +%s)
[[ $LOG_DIR_CHANGE_TIMESTAMP -ge $LOG_DIR_MODIFY_TIMESTAMP ]] && LOG_DIR_LAST_USED_TIMESTAMP=$LOG_DIR_CHANGE_TIMESTAMP || LOG_DIR_LAST_USED_TIMESTAMP=$LOG_DIR_MODIFY_TIMESTAMP
if [[ $LOG_DIR_LAST_USED_TIMESTAMP -le $LOGS_GC_TIMESTAMP ]]
then
LOGS_MODIFIED_NUM=$( find ${ LOGS_ROOT_DIR} / ${ DOMAIN_NAME} / ${ POD_DIR_NAME} - type f \( - mtime - ${ LOGS_GC_DAYS} -or - ctime - ${ LOGS_GC_DAYS} \) 2>/dev/null | wc -l)
LOGS_OPENED_NUM=$(find ${LOGS_ROOT_DIR}/${DOMAIN_NAME}/${POD_DIR_NAME} -type f -exec fuser -s " { } " 2>/dev/null \; -exec echo {} \; | wc -l)
#如果Pod日志目录下,10天内没有修改过过任何文件,且当前没有任何文件被进程打开,则删除此目录
if [[ $LOGS_MODIFIED_NUM -eq 0 ]] && [[ $LOGS_OPENED_NUM -eq 0 ]]
then
find ${LOGS_ROOT_DIR}/${DOMAIN_NAME}/${POD_DIR_NAME}/ -type f -delete 2>/dev/null | xargs -I {} echo " $( date + '[%F %T]' ) Delete file:" {} >>$LOG_FILE 2>&1
rm_pod_dir " ${ LOGS_ROOT_DIR} / ${ DOMAIN_NAME} / ${ POD_DIR_NAME} "
else
log " [UNEXPIRED|OPENED] Skip directory: ${ LOGS_ROOT_DIR} / ${ DOMAIN_NAME} / ${ POD_DIR_NAME} "
fi
#如果目录${LOGS_ROOT_DIR}/${DOMAIN_NAME}为空,则删除此目录
if [[ -z $(ls -A " ${ LOGS_ROOT_DIR} / ${ DOMAIN_NAME} ") ]]
then
rm_domain_dir " ${ LOGS_ROOT_DIR} / ${ DOMAIN_NAME} "
fi
else
log " [UNEXPIRED] Skip directory: ${ LOGS_ROOT_DIR} / ${ DOMAIN_NAME} / ${ POD_DIR_NAME} "
fi
else
log " [CONTAINER EXIST] Skip directory: ${ LOGS_ROOT_DIR} / ${ DOMAIN_NAME} / ${ POD_DIR_NAME} "
fi
else
#如果${LOGS_ROOT_DIR}/${DOMAIN_NAME}为目录,且此目录为空
if [[ -n $DOMAIN_NAME ]] && [[ -d " ${ LOGS_ROOT_DIR} / ${ DOMAIN_NAME} " ]] && [[ -z $(ls -A " ${ LOGS_ROOT_DIR} / ${ DOMAIN_NAME} ") ]]
then
LOG_DIR_MODIFY_TIME=$( stat ${ LOGS_ROOT_DIR} / ${ DOMAIN_NAME} | grep 'Modify:' | grep - oE '[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}' )
LOG_DIR_CHANGE_TIME=$( stat ${ LOGS_ROOT_DIR} / ${ DOMAIN_NAME} | grep 'Change:' | grep - oE '[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}' )
LOG_DIR_MODIFY_TIMESTAMP=$(date -d " `echo $LOG_DIR_MODIFY_TIME `" +%s)
LOG_DIR_CHANGE_TIMESTAMP=$(date -d " `echo $LOG_DIR_CHANGE_TIME `" +%s)
[[ $LOG_DIR_CHANGE_TIMESTAMP -ge $LOG_DIR_MODIFY_TIMESTAMP ]] && LOG_DIR_LAST_USED_TIMESTAMP=$LOG_DIR_CHANGE_TIMESTAMP || LOG_DIR_LAST_USED_TIMESTAMP=$LOG_DIR_MODIFY_TIMESTAMP
INTERVAL=$( ( $NOW_TIMESTAMP - $LOG_DIR_LAST_USED_TIMESTAMP ) )
#如果此目录12小时内没有修改过过,并且目录为空,则删除此目录
if [[ $INTERVAL -ge 43200 ]] && [[ -z $(ls -A " ${ LOGS_ROOT_DIR} / ${ DOMAIN_NAME} ") ]]
then
rm_domain_dir " ${ LOGS_ROOT_DIR} / ${ DOMAIN_NAME} "
fi
fi
fi
done< <(find ${LOGS_ROOT_DIR}/* -maxdepth 1 -type d 2>/dev/null | sort -r | awk -F/ '{print $5 ,$6 }')
fi
fi
}
#清理pod的临时数据目录(pod销毁后即可删除)
function clean_tmp_data_dir_pod() {
if systemctl status vip-docker >/dev/null 2>&1 && [[ -n $( docker ps - qa) ]]
then
POD_NAME_LIST=$(docker inspect --format " { { index . Config. Labels \"io.kubernetes.pod.name\" } } " $( docker ps - qa) | sort | uniq | sed '/^$/d')
while read LINE
do
read DOMAIN_NAME POD_DIR_NAME < <(echo " $LINE ")
if [[ -n $DOMAIN_NAME ]] && [[ -n $POD_DIR_NAME ]]
then
POD_NAME=`echo $POD_DIR_NAME | awk -F@ '{print $1 }'`
echo $POD_NAME_LIST | tr ' ' '\n' | grep " ^${ POD_NAME} $" >/dev/null 2>&1
if [[ $? -ne 0 ]] && [[ -n $POD_NAME ]]
then
rm_pod_dir " ${ TMP_DATA_ROOT_DIR} / ${ DOMAIN_NAME} / ${ POD_DIR_NAME} "
#如果目录${TMP_DATA_ROOT_DIR}/${DOMAIN_NAME}为空,则删除此目录
if [[ -z $(ls -A " ${ TMP_DATA_ROOT_DIR} / ${ DOMAIN_NAME} ") ]]
then
rm_domain_dir " ${ TMP_DATA_ROOT_DIR} / ${ DOMAIN_NAME} "
fi
else
log " [CONTAINER EXIST] Skip directory: ${ TMP_DATA_ROOT_DIR} / ${ DOMAIN_NAME} / ${ POD_DIR_NAME} "
fi
else
#如果${TMP_DATA_ROOT_DIR}/${DOMAIN_NAME}为目录,且此目录为空
if [[ -n $DOMAIN_NAME ]] && [[ -d " ${ TMP_DATA_ROOT_DIR} / ${ DOMAIN_NAME} " ]] && [[ -z $(ls -A " ${ TMP_DATA_ROOT_DIR} / ${ DOMAIN_NAME} ") ]]
then
rm_domain_dir " ${ TMP_DATA_ROOT_DIR} / ${ DOMAIN_NAME} "
fi
fi
done< <(find ${TMP_DATA_ROOT_DIR}/* -maxdepth 1 -type d 2>/dev/null | sort -r | awk -F/ '{print $5 ,$6 }')
fi
}
#清理ai集群的pod tmp目录
function clean_tmp_dir_ai() {
if [[ -d $AI_TMP_ROOT_DIR ]] && systemctl status vip-docker >/dev/null 2>&1 && [[ -n $( docker ps - qa) ]]
then
TMP_GC_DAYS=1
TMP_GC_SECONDS=$( ( 86400 * ${ TMP_GC_DAYS} ) )
NOW_TIMESTAMP=$( date + % s)
let TMP_GC_TIMESTAMP=$( ( $NOW_TIMESTAMP - $TMP_GC_SECONDS ) )
POD_NAME_LIST=$(docker inspect --format " { { index . Config. Labels \"io.kubernetes.pod.name\" } } " $( docker ps - qa) | sort | uniq | sed '/^$/d')
if [[ $TMP_GC_SECONDS -gt 0 ]]
then
while read TMP_DIR_NAME
do
POD_NAME=`echo $TMP_DIR_NAME | awk -F@ '{print $1 }'`
TMP_DIR=" ${ AI_TMP_ROOT_DIR} / ${ TMP_DIR_NAME} "
echo $POD_NAME_LIST | tr ' ' '\n' | grep " ^${ POD_NAME} $" >/dev/null 2>&1
if [[ $? -ne 0 ]] && [[ -n $POD_NAME ]]
then
TMP_DIR_MODIFY_TIME=$( stat ${ TMP_DIR} | grep 'Modify:' | grep - oE '[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}' )
TMP_DIR_CHANGE_TIME=$( stat ${ TMP_DIR} | grep 'Change:' | grep - oE '[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}' )
TMP_DIR_MODIFY_TIMESTAMP=$(date -d " `echo $TMP_DIR_MODIFY_TIME `" +%s)
TMP_DIR_CHANGE_TIMESTAMP=$(date -d " `echo $TMP_DIR_CHANGE_TIME `" +%s)
[[ $TMP_DIR_CHANGE_TIMESTAMP -ge $TMP_DIR_MODIFY_TIMESTAMP ]] && TMP_DIR_LAST_USED_TIMESTAMP=$TMP_DIR_CHANGE_TIMESTAMP || TMP_DIR_LAST_USED_TIMESTAMP=$TMP_DIR_MODIFY_TIMESTAMP
if [[ $TMP_DIR_LAST_USED_TIMESTAMP -le $TMP_GC_TIMESTAMP ]]
then
if echo " ${ TMP_DIR} " | egrep " / apps/ tmp/ ai- noah. vip. vip. com/ [ ^/ ] { 1, } $" >/dev/null 2>&1
then
rm -rf " ${ TMP_DIR} "
log " Remove directory: ${ TMP_DIR} "
else
log " [DENIED] Failed to remove directory: ${ TMP_DIR} "
fi
else
log " [UNEXPIRED] Skip directory: ${ TMP_DIR} "
fi
else
log " [CONTAINER EXIST] Skip directory: ${ TMP_DIR} "
fi
done< <(find ${AI_TMP_ROOT_DIR}/ -maxdepth 1 -type d 2>/dev/null | sort -r | awk -F/ '$5 != " " {print $5 }')
fi
fi
}
function clean_tmp_dir_spark() {
if [[ -d $SPARK_TMP_ROOT_DIR ]]
then
TMP_GC_DAYS=1
TMP_GC_SECONDS=$( ( 86400 * ${ TMP_GC_DAYS} ) )
NOW_TIMESTAMP=$( date + % s)
let TMP_GC_TIMESTAMP=$( ( $NOW_TIMESTAMP - $TMP_GC_SECONDS ) )
if [[ $TMP_GC_SECONDS -gt 0 ]]
then
while read TMP_DIR
do
TMP_DIR_MODIFY_TIME=$( stat ${ TMP_DIR} | grep 'Modify:' | grep - oE '[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}' )
TMP_DIR_CHANGE_TIME=$( stat ${ TMP_DIR} | grep 'Change:' | grep - oE '[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}' )
TMP_DIR_MODIFY_TIMESTAMP=$(date -d " `echo $TMP_DIR_MODIFY_TIME `" +%s)
TMP_DIR_CHANGE_TIMESTAMP=$(date -d " `echo $TMP_DIR_CHANGE_TIME `" +%s)
[[ $TMP_DIR_CHANGE_TIMESTAMP -ge $TMP_DIR_MODIFY_TIMESTAMP ]] && TMP_DIR_LAST_USED_TIMESTAMP=$TMP_DIR_CHANGE_TIMESTAMP || TMP_DIR_LAST_USED_TIMESTAMP=$TMP_DIR_MODIFY_TIMESTAMP
if [[ $TMP_DIR_LAST_USED_TIMESTAMP -le $TMP_GC_TIMESTAMP ]]
then
if echo " ${ TMP_DIR} " | egrep " / apps/ tmp/ spark- lab/ spark- [ ^/ ] { 1, } $" >/dev/null 2>&1
then
rm -rf " ${ TMP_DIR} "
log " Remove directory: ${ TMP_DIR} "
else
log " [DENIED] Failed to remove directory: ${ TMP_DIR} "
fi
else
log " [UNEXPIRED] Skip directory: ${ TMP_DIR} "
fi
done< <(ls -1d ${SPARK_TMP_ROOT_DIR}/spark-* 2>/dev/null)
fi
TMP_GC_SIZE_GB=350
TMP_GC_SIZE_KB=$( ( 1024 * 1024 * ${ TMP_GC_SIZE_GB} ) )
while read TMP_DIR_SIZE TMP_DIR
do
if echo " ${ TMP_DIR} " | egrep " / apps/ tmp/ spark- lab/ spark- [ ^/ ] { 1, } $" >/dev/null 2>&1
then
rm -rf " ${ TMP_DIR} "
log " Remove directory: ${ TMP_DIR} ( $( ( $TMP_DIR_SIZE / 1024 / 1024) ) GB) "
else
log " [DENIED] Failed to remove directory: ${ TMP_DIR} "
fi
done< <(du -s ${SPARK_TMP_ROOT_DIR}/spark-* 2>/dev/null | awk '$1 > TMP_GC_SIZE_KB' TMP_GC_SIZE_KB=$TMP_GC_SIZE_KB )
fi
}
#清理系统日志
function clean_logs_sys() {
log_used_info
log " Start deleting logs of system. "
find /var/log/ -maxdepth 1 -type f -name " *- [ 0- 9] * " -mtime +89 -ctime +89 ! -exec fuser -s " { } " 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo " $( date + '[%F %T]' ) Delete file:" {} >>$LOG_FILE 2>&1
}
#清理定时任务的日志
function clean_logs_cron() {
log_used_info
log " Start deleting logs of cron. "
find /apps/logs/cron/ -type f -mtime +89 -ctime +89 ! -exec fuser -s " { } " 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo " $( date + '[%F %T]' ) Delete file:" {} >>$LOG_FILE 2>&1
find /apps/logs/cron/ -type f -name " clean_logs. * . lock" ! -name " ${ DAY_LOCK_FILENAME} " ! -exec fuser -s " { } " 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo " $( date + '[%F %T]' ) Delete file:" {} >>$LOG_FILE 2>&1
}
#清理openvswitch的日志
function clean_logs_openvswitch() {
log_used_info
log " Start deleting logs of openvswitch. "
find /var/log/openvswitch/ -type f -name " * . gz" -mtime +29 -ctime +29 ! -exec fuser -s " { } " 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo " $( date + '[%F %T]' ) Delete file:" {} >>$LOG_FILE 2>&1
}
#清理netplugin的日志
function clean_logs_netplugin() {
log_used_info
log " Start deleting logs of netplugin. "
find /apps/logs/netplugin/ -type f -name " netplugin. log-* " -mtime +29 -ctime +29 ! -exec fuser -s " { } " 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo " $( date + '[%F %T]' ) Delete file:" {} >>$LOG_FILE 2>&1
}
#清理docker的日志
function clean_logs_docker() {
log_used_info
log " Start deleting logs of docker. "
find /apps/logs/docker/ -type f -name " dockerd. log-* " -mtime +29 -ctime +29 ! -exec fuser -s " { } " 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo " $( date + '[%F %T]' ) Delete file:" {} >>$LOG_FILE 2>&1
}
#清理kubernetes的日志
function clean_logs_kubernetes() {
log_used_info
log " Start deleting logs of kubernetes. "
#通过chmod 644命令先将日志目录下的软链文件的Modify Time和Change Time改为当前时间,避免软链文件被find命令删除
find /apps/logs/kubernetes/ -type l 2>/dev/null | xargs -I {} readlink -f {} | xargs -I {} chmod 644 {}
find /apps/logs/kubernetes/ -type f -mtime +29 -ctime +29 ! -exec fuser -s " { } " 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo " $( date + '[%F %T]' ) Delete file:" {} >>$LOG_FILE 2>&1
#通过chmod 644命令先将日志目录下的软链文件的Modify Time和Change Time改为当前时间,避免软链文件被find命令删除
find /apps/logs/kubelet/ -type l 2>/dev/null | xargs -I {} readlink -f {} | xargs -I {} chmod 644 {}
find /apps/logs/kubelet/ -type f -mtime +20 -ctime +20 ! -exec fuser -s " { } " 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo " $( date + '[%F %T]' ) Delete file:" {} >>$LOG_FILE 2>&1
}
#清理smart-agent的日志
function clean_logs_smart_agent() {
log_used_info
log " Start deleting logs of smart- agent. "
find /apps/logs/smart-agent/ -type f -name " * . log. [ 0- 9] * " -mtime +6 ! -exec fuser -s " { } " 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo " $( date + '[%F %T]' ) Delete file:" {} >>$LOG_FILE 2>&1
}
#清理vfilebeat的日志
function clean_logs_vfilebeat() {
log_used_info
log " Start deleting logs of vfilebeat. "
find /apps/logs/vfilebeat/ -type f -name " vfilebeat* . log. * " -mtime +6 ! -exec fuser -s " { } " 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo " $( date + '[%F %T]' ) Delete file:" {} >>$LOG_FILE 2>&1
}
#清理osp proxy的日志
function clean_logs_osp_proxy() {
log_used_info
LOGS_GC_DAYS=$1
[[ $LOGS_GC_DAYS -lt 1 ]] && LOGS_GC_DAYS=1
let LOGS_GC_DAYS=$[ LOGS_GC_DAYS - 1 ]
log " Start deleting logs of osp proxy. "
find /apps/logs/log_receiver/noah-osp-proxy-local.vip.vip.com/ -type f -name " * . log" -mtime +${LOGS_GC_DAYS} ! -exec fuser -s " { } " 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo " $( date + '[%F %T]' ) Delete file:" {} >>$LOG_FILE 2>&1
}
#清理coredump的日志
function clean_logs_coredump() {
log_used_info
log " Start deleting logs of coredump. "
find ${LOGS_ROOT_DIR}/*/*/ -maxdepth 1 -type f -name " core-* -* -* " -mtime +1 ! -exec fuser -s " { } " 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo " $( date + '[%F %T]' ) Delete file:" {} >>$LOG_FILE 2>&1
}
#按天删除业务日志
function clean_logs_receiver_by_day() {
log_used_info
LOGS_GC_PATH=$1
LOGS_GC_DAYS=$2
[[ $LOGS_GC_DAYS -lt 1 ]] && LOGS_GC_DAYS=1
let LOGS_GC_DAYS=$[ LOGS_GC_DAYS - 1 ]
if echo $LOGS_GC_PATH | grep " / apps/ logs/ log_receiver" >/dev/null 2>&1
then
log " Start deleting old log files in ${ LOGS_GC_PATH} ( - mtime + ${ LOGS_GC_DAYS} ) "
find ${LOGS_GC_PATH} -type f -mtime +${LOGS_GC_DAYS} ! -name " * . pid" ! -name " osp- osp. out" ! -name " hs_err* . log" ! -name " osp* _class. his" ! -name " java_error* . log" ! -name " gc -* . log" ! -name " catalina. out* " ! -exec fuser -s " { } " 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo " $( date + '[%F %T]' ) Delete file:" {} >>$LOG_FILE 2>&1
log " Start deleting old log files in ${ LOGS_GC_PATH} ( - mtime + ${ LOGS_GC_DAYS} - size + 50M) "
find ${LOGS_GC_PATH} -type f -mtime +${LOGS_GC_DAYS} \( -name " osp- osp. out" -or -name " catalina. out* " \) -size +50M ! -exec fuser -s " { } " 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo " $( date + '[%F %T]' ) Delete file:" {} >>$LOG_FILE 2>&1
log " Start deleting old log files in ${ LOGS_GC_PATH} ( - mtime + 1 - size + 3G) "
find ${LOGS_GC_PATH} -type f -mtime +1 \( -name " osp- osp. out" -or -name " catalina. out* " \) -size +3G ! -exec fuser -s " { } " 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo " $( date + '[%F %T]' ) Delete file:" {} >>$LOG_FILE 2>&1
fi
}
#按分钟删除业务日志
function clean_logs_receiver_by_minute() {
log_used_info
LOGS_GC_PATH=$1
LOGS_GC_MINUTES=$2
if echo $LOGS_GC_PATH | grep " / apps/ logs/ log_receiver" >/dev/null 2>&1
then
log " Start deleting old log files in ${ LOGS_GC_PATH} ( - mmin + ${ LOGS_GC_MINUTES} ) "
find ${LOGS_GC_PATH} -type f -mmin +${LOGS_GC_MINUTES} ! -name " * . pid" ! -name " osp- osp. out" ! -name " hs_err* . log" ! -name " osp* _class. his" ! -name " java_error* . log" ! -name " gc -* . log" ! -exec fuser -s " { } " 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo " $( date + '[%F %T]' ) Delete file:" {} >>$LOG_FILE 2>&1
fi
}
#获取根分区的使用率
function get_used_rate() {
USED_RATE=`df -l 2>/dev/null | awk '$NF == " / " {print $5 *1}'`
if [[ -n $USED_RATE ]]
then
echo $USED_RATE
else
log " ERROR: Failed to get used rate of root pattition. "
exit 1
fi
}
#获取日志量最大的Pod目录
function get_target_log_dir() {
TARGET_LOG_DIR=`du -s ${LOGS_ROOT_DIR}/*/*/ /apps/logs/osp/ 2>/dev/null | sort -rn | head -n 1 | awk '{print $2 }'`
if [[ -n $USED_RATE ]]
then
echo $TARGET_LOG_DIR
else
log " ERROR: Failed to get target log dir . "
exit 1
fi
}
function clean_logs_unimportant() {
log_used_info
#清理trace日志
if [[ $USED_RATE -ge $TARGET_RATE ]]
then
find ${LOGS_ROOT_DIR}/*/*/trace/logs/ -type f -name " trace- log. out* " ! -name " trace- log. out" ! -name " trace- log. out. 1" ! -exec fuser -s " { } " 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo " $( date + '[%F %T]' ) Delete file:" {} >>$LOG_FILE 2>&1
USED_RATE=`get_used_rate`
fi
#清空文件大小大于1GB的osp-osp.out和catalina.out
if [[ $USED_RATE -ge $TARGET_RATE ]]
then
while read file_path
do
if [[ $USED_RATE -ge $TARGET_RATE ]]
then
log_truncate $file_path
else
break
fi
USED_RATE=`get_used_rate`
done< <(find $LOGS_ROOT_DIR -type f \( -name " osp- osp. out" -or -name " catalina. out* " \) -size +1G -exec ls -l {} \; 2>/dev/null | sort -rn -k5 | awk '{print $NF }')
fi
#每个pod的目录下最多只保留一个coredump文件
if [[ $USED_RATE -ge $TARGET_RATE ]]
then
while read pod_dir
do
while read file_path
do
rm_pod_file ${file_path}
done< <(find ${pod_dir} -maxdepth 1 -type f -name " core-* -* -* " -exec ls -l --time-style=" +% s" {} \; 2>/dev/null | sort -n -k6 | sed '$d ' | awk '{print $NF }')
done< <(find ${LOGS_ROOT_DIR}/*/*/ -maxdepth 1 -type f -name " core-* -* -* " 2>/dev/null | sed " s| / [ ^/ ] * $| | " | sort | uniq)
USED_RATE=`get_used_rate`
fi
#每台宿主机只保留最近的四个coredump文件
if [[ $USED_RATE -ge $TARGET_RATE ]]
then
while read file_path
do
rm_pod_file ${file_path}
done< <(find ${LOGS_ROOT_DIR}/*/*/ -maxdepth 1 -type f -name " core-* -* -* " -exec ls -l --time-style=" +% s" {} \; 2>/dev/null | sort -rn -k6 | sed '1,4d' | awk '{print $NF }')
USED_RATE=`get_used_rate`
fi
#删除比较大的osp proxy日志
if [[ $USED_RATE -ge $TARGET_RATE ]]
then
find /apps/logs/log_receiver/noah-osp-proxy-local.vip.vip.com/ -type f -name " * . log" -mtime +1 -size +10G ! -exec fuser -s " { } " 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo " $( date + '[%F %T]' ) Delete file:" {} >>$LOG_FILE 2>&1
find /apps/logs/log_receiver/noah-osp-proxy-local.vip.vip.com/ -type f -name " * . log" -mtime +0 -size +20G ! -exec fuser -s " { } " 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo " $( date + '[%F %T]' ) Delete file:" {} >>$LOG_FILE 2>&1
USED_RATE=`get_used_rate`
fi
#删除2天内没有被修改且当前没有被任何进程打开的access日志
if [[ $USED_RATE -ge $TARGET_RATE ]]
then
find ${LOGS_ROOT_DIR}/*/*/ -type f -mtime +1 -name " * access* . log" ! -exec fuser -s " { } " 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo " $( date + '[%F %T]' ) Delete file:" {} >>$LOG_FILE 2>&1
USED_RATE=`get_used_rate`
fi
#清空文件大小大于70G的osp proxy日志
if [[ $USED_RATE -ge $TARGET_RATE ]]
then
while read file_path
do
log_truncate ${file_path}
done< <(find /apps/logs/log_receiver/noah-osp-proxy-local.vip.vip.com/ -type f -name " * . log" -size +70G 2>/dev/null)
USED_RATE=`get_used_rate`
fi
}
#按天删除业务日志
function log_gc_by_day() {
log_used_info
USED_RATE=`get_used_rate`
let APP_LOGS_GC_DAYS=$[ APP_LOGS_GC_DAYS -1 ]
#按天依次删除所有Pod的最早的业务日志,直到根分区的使用率小余变量TARGET_RATE设置的值或者只剩下最近2天的业务日志
while [[ $USED_RATE -ge $TARGET_RATE ]] && [[ $APP_LOGS_GC_DAYS -ge 2 ]]
do
clean_logs_receiver_by_day $LOGS_ROOT_DIR $APP_LOGS_GC_DAYS
let APP_LOGS_GC_DAYS=$[ APP_LOGS_GC_DAYS -1 ]
USED_RATE=`get_used_rate`
done
#循环查找日志量最大的Pod日志目录,按天依次删除此Pod日志目录下最早的业务日志,直到根分区的使用率小余变量TARGET_RATE设置的值或者只剩下最近1天的业务日志
declare -A PODS_ARRAY
PROXY_GC_DAYS=2
while [[ $USED_RATE -ge $TARGET_RATE ]]
do
TARGET_LOG_DIR=`get_target_log_dir`
if [[ $TARGET_LOG_DIR != " / apps/ logs/ osp/ " ]]
then
TARGET_KEY=`echo $TARGET_LOG_DIR | awk -F/ '{print $( NF- 2) " _"$( NF- 1) }'`
if [[ -z ${PODS_ARRAY[$TARGET_KEY ]} ]]
then
PODS_ARRAY[$TARGET_KEY ]=$APP_LOGS_GC_DAYS
fi
if [[ ${PODS_ARRAY[$TARGET_KEY ]} -gt 0 ]]
then
clean_logs_receiver_by_day $TARGET_LOG_DIR ${PODS_ARRAY[$TARGET_KEY ]}
PODS_ARRAY[$TARGET_KEY ]=`expr ${PODS_ARRAY[$TARGET_KEY ]} - 1`
else
break
fi
else
if [[ $PROXY_GC_DAYS -ge 0 ]]
then
clean_logs_osp_proxy $PROXY_GC_DAYS
let PROXY_GC_DAYS=$[ PROXY_GC_DAYS - 1 ]
else
break
fi
fi
sleep 1
USED_RATE=`get_used_rate`
done
}
#循环查找日志量最大的pod目录,按最后修改时间依次删除此Pod目录下最近没有被修改且没有被任何进程打开的文件,直到根分区的使用率小余变量TARGET_RATE设置的值
function log_gc_by_minute() {
log_used_info
USED_RATE=`get_used_rate`
APP_LOGS_GC_HOUR=21
while [[ $USED_RATE -ge $TARGET_RATE ]]
do
TARGET_LOG_DIR=`get_target_log_dir`
if [[ $TARGET_LOG_DIR != " / apps/ logs/ osp/ " ]]
then
if [[ $APP_LOGS_GC_HOUR -ge 1 ]]
then
let APP_LOGS_GC_MINUTE=$[ APP_LOGS_GC_HOUR * 60 ]
clean_logs_receiver_by_minute $TARGET_LOG_DIR $APP_LOGS_GC_MINUTE
let APP_LOGS_GC_HOUR=$[ APP_LOGS_GC_HOUR - 4 ]
else
break
fi
else
break
fi
sleep 1
USED_RATE=`get_used_rate`
done
check_vfilebeat_agent_fd
APP_LOGS_GC_MINUTE=45
if [[ $USED_RATE -ge $TARGET_RATE ]]
then
log " systemctl stop vip- vfilebeat- agent"
systemctl stop vip-vfilebeat-agent
while [[ $USED_RATE -ge $TARGET_RATE ]]
do
TARGET_LOG_DIR=`get_target_log_dir`
if [[ $TARGET_LOG_DIR != " / apps/ logs/ osp/ " ]]
then
if [[ $APP_LOGS_GC_MINUTE -ge 5 ]]
then
clean_logs_receiver_by_minute $TARGET_LOG_DIR $APP_LOGS_GC_MINUTE
let APP_LOGS_GC_MINUTE=$[ APP_LOGS_GC_MINUTE - 20 ]
else
break
fi
else
break
fi
sleep 1
USED_RATE=`get_used_rate`
done
log " systemctl start vip- vfilebeat- agent"
systemctl start vip-vfilebeat-agent
fi
}
#获取文件的大小,单位为GB,若文件大小小余1GB,则返回0
function get_file_size() {
FILE_PATH=$1
FILE_SIZE_BYTES=`ls -l $FILE_PATH 2>/dev/null | awk '{print $5 }'`
[[ -z $FILE_SIZE_BYTES ]] && FILE_SIZE_BYTES=0
FILE_SIZE_GB=`echo " $FILE_SIZE_BYTES / 1073741824" | bc`
echo $FILE_SIZE_GB
}
#使用truncate命令逐步清空大文件(可避免一次清空超大文件导致IO飙升的问题)
function log_truncate() {
FILE_PATH=$1
FILE_SIZE=`get_file_size $FILE_PATH `
TARGET_SIZE=3
while [[ $TARGET_SIZE -ne 0 ]]
do
let TARGET_SIZE=$[ FILE_SIZE - 5 ]
[[ $TARGET_SIZE -lt 0 ]] && TARGET_SIZE=0
log " Truncate file: ${ FILE_PATH} ( ${ FILE_SIZE} GB - > ${ TARGET_SIZE} GB) "
truncate -s ${TARGET_SIZE}G $FILE_PATH
#如果文件已被清空且没有被任何进程打开则删除此文件
if [[ $TARGET_SIZE -eq 0 ]]
then
find ${FILE_PATH} ! -exec fuser -s " { } " 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo " $( date + '[%F %T]' ) Delete file:" {} >>$LOG_FILE 2>&1
fi
sleep 1
FILE_SIZE=`get_file_size $FILE_PATH `
done
}
#按文件大小排序,依次清空最大的文件,直到根分区的使用率小余变量TARGET_RATE设置的值
function log_gc_by_file() {
LOOP=0
while read file_path
do
USED_RATE=`get_used_rate`
if [[ $USED_RATE -ge $TARGET_RATE ]]
then
log_truncate $file_path
else
break
fi
let LOOP++
[[ $LOOP -ge 50 ]] && break
done< <(find /apps/logs/ -type f -size +1G -exec ls -l {} \; 2>/dev/null | sort -rn -k5 | awk '{print $NF }')
}
function check_vfilebeat_agent_fd() {
VFILEBEAT_AGENT_PIDS=$( pidof / apps/ svr/ vfilebeat/ vfilebeat 2>/ dev/ null | tr ' ' ',' )
if [[ -n $VFILEBEAT_AGENT_PIDS ]]
then
DELETED_FD_NUM=$(eval " ls - l / proc/ { $VFILEBEAT_AGENT_PIDS } / fd | grep - c deleted")
if [[ $DELETED_FD_NUM -ge 3 ]]
then
DELETED_FILES=`eval " sudo ls - l / proc/ { $VFILEBEAT_AGENT_PIDS } / fd | grep deleted" | awk '{print $( NF- 1) }' | tr '\n' '|' | sed 's/|$//'`
log " vfilebeat unreleased file descriptor: $DELETED_FD_NUM "
log " vfilebeat unreleased file path: $DELETED_FILES "
log " systemctl restart vip- vfilebeat- agent"
systemctl restart vip-vfilebeat-agent
fi
fi
}
log " ===================================================================================================="
# 每天凌晨3点执行一次
if [[ $CURRENT_H -eq 3 ]] && [[ ! -e $DAY_LOCK_FILE ]]
then
touch $DAY_LOCK_FILE
if [[ $? -eq 0 ]]
then
check_vfilebeat_agent_fd
clean_logs_sys
clean_logs_cron
clean_logs_openvswitch
clean_logs_netplugin
clean_logs_docker
clean_logs_kubernetes
clean_logs_smart_agent
clean_logs_vfilebeat
clean_logs_osp_proxy 3
clean_logs_receiver_by_day $LOGS_ROOT_DIR $APP_LOGS_GC_DAYS
clean_logs_dir_pod
clean_tmp_data_dir_pod
clean_logs_coredump
clean_tmp_dir_spark
clean_tmp_dir_ai
check_vfilebeat_agent_fd
fi
fi
USED_RATE=`get_used_rate`
if [[ $USED_RATE -ge $GC_RATE ]]
then
check_vfilebeat_agent_fd
clean_tmp_dir_spark
clean_tmp_dir_ai
clean_logs_unimportant
# 清理已销毁容器的临时数据目录
clean_tmp_data_dir_pod
USED_RATE=`get_used_rate`
if [[ $USED_RATE -ge $TARGET_RATE ]]
then
log_gc_by_day
USED_RATE=`get_used_rate`
fi
check_vfilebeat_agent_fd
if [[ $USED_RATE -ge $TARGET_RATE ]]
then
log_gc_by_minute
USED_RATE=`get_used_rate`
fi
check_vfilebeat_agent_fd
if [[ $USED_RATE -ge $TARGET_RATE ]]
then
log_gc_by_file
fi
check_vfilebeat_agent_fd
fi
if [[ -f $HAS_LARGE_LOG_FILES ]]
then
log " Start deleting log files larger than 80GB. "
while read FILE_ORIGINAL_PATH
do
FILE_REAL_PATH=`readlink -f " $FILE_ORIGINAL_PATH "`
if echo " ${ FILE_REAL_PATH} " | egrep " ^/ apps/ logs/ " >/dev/null 2>&1
then
log_truncate " ${ FILE_REAL_PATH} "
else
log " [DENIED] Failed to delete file: ${ FILE_ORIGINAL_PATH} "
fi
done< <(find /apps/logs/ -type f -size +80G 2>/dev/null)
rm -f $HAS_LARGE_LOG_FILES
check_vfilebeat_agent_fd
fi
log_used_info
log " Done! ( Cost: $( get_cost_seconds) s) "
xcall 脚本
cmd=$*
if [ ! - n "$cmd " ]
then
echo "command can not be null !"
exit
fi
user=`whoami`
for ( ( host=1; host<=3; host++ ) )
do
echo "================current host is linux0$host ================="
echo "--> excute command \" $cmd \""
ssh $user @linux0$host "source /etc/profile; $cmd "
done
echo "excute successfully !"
~
xsync文件同步脚本
pcount=$
if ( ( pcount==0) ) ; then
echo no args;
exit ;
fi
p1=$1
fname=`basename $p1 `
echo fname=$fname
pdir=`d - P $( dirname $p1 ) ; pwd `
echo pdir=$pdir
user=`whoami`
for ( ( host=1; host<4; host++ ) ) ; do
echo -- --- --- --- --- - linux0$host -- --- --- --- --- --
rsync - rvl $pdir / $fname $user @linux0$host :$pdir
done