各种linux脚本

清理脚本

#!/bin/bash
# Author: long07.wang
# Date  : 2020-10-10

PATH="/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin:/apps/svr/docker/bin"

if [ $UID -ne 0 ];then
    echo "You need to be root to run this script"
    exit 1
fi

BEGIN_TIMESTAMP=$(date +%s)

#将本脚本的CPU和IO优先级调至最低
renice +19 -p $$ >/dev/null 2>&1
ionice -c3 -p $$ >/dev/null 2>&1

LOG_DIR="/apps/logs/cron"
LOG_FILE="${LOG_DIR}/clean_logs.$(date +"%Y%m%d").log"
DAY_LOCK_FILENAME="clean_logs.$(date +"%Y%m%d").lock"
DAY_LOCK_FILE="${LOG_DIR}/${DAY_LOCK_FILENAME}"
PROC_LOCK_FILENAME="clean_logs.lock"
PROC_LOCK_FILE="/var/run/${PROC_LOCK_FILENAME}"
CURRENT_H=`date +"%H" | sed 's/^0//'`
CURRENT_M=`date +"%M" | sed 's/^0//'`
LOGS_ROOT_DIR="/apps/logs/log_receiver"
TMP_DATA_ROOT_DIR="/apps/dat/kubernetes-pods"
AI_TMP_ROOT_DIR="/apps/tmp/ai-noah.vip.vip.com"
SPARK_TMP_ROOT_DIR="/apps/tmp/spark-lab"
GC_RATE="70"
TARGET_RATE="60"
HAS_LARGE_LOG_FILES="/tmp/cache_kubelet_has_large_log_file"

# 在每小时的第0到5分钟、56到59分钟不执行日志清理
if [[ $CURRENT_M -ge 0 ]] && [[ $CURRENT_M -le 5 ]]
then
    exit
elif [[ $CURRENT_M -ge 56 ]] && [[ $CURRENT_M -le 59 ]]
then
    exit
fi

#记录日志
function log() {
    echo "$(date +'[%F %T]') $@" >> $LOG_FILE
}

#加锁
function lock() {
    [[ -e $(readlink -f $PROC_LOCK_FILE) ]] || unlink $PROC_LOCK_FILE >/dev/null 2>&1
    ln -s /proc/$$/cmdline $PROC_LOCK_FILE >/dev/null 2>&1
    if [[ $? -eq 0 ]]
    then
        #trap "unlock" 0 1 2 3 15
        trap "unlock  0"  0
        trap "unlock  1"  1
        trap "unlock  2"  2
        trap "unlock  3"  3
        trap "unlock 15" 15
        return 0
    else
        log "[CONFLICT] Failed to get lock."
        exit 1
    fi
}

#解锁
function unlock() {
    SIGNAL_NUM=$1
    [[ $SIGNAL_NUM -gt 0 ]] && log "[EXIT] Receive signal $SIGNAL_NUM. (Cost: $(get_cost_seconds)s)"
    [[ "$(readlink -f $PROC_LOCK_FILE)" == "/proc/$$/cmdline" ]] && unlink $PROC_LOCK_FILE >/dev/null 2>&1
    exit 0
}

lock

#创建日志目录
[[ -d $LOG_DIR ]] || mkdir -p $LOG_DIR

#获取根分区大小
ROOT_SIZE=`df -BG | awk '$NF == "/"{print $2*1}'`
if [[ -z $ROOT_SIZE ]]
then
    log "ERROR: Failed to get the size of root partition"
    exit 1
fi

#如果根分区大小小余500GB,则只保留4天的业务日志,否则保留5天的业务日志
if [[ $ROOT_SIZE -gt 500 ]]
then
    APP_LOGS_GC_DAYS=5
else
    APP_LOGS_GC_DAYS=4
fi

#计算耗时
function get_cost_seconds() {
    END_TIMESTAMP=$(date +%s)
    COST_SECONDS=$(expr $END_TIMESTAMP - $BEGIN_TIMESTAMP)
    [[ $COST_SECONDS -lt 0 ]] && COST_SECONDS=0
    echo $COST_SECONDS
}

#记录根分区的使用情况
function log_used_info() {
    USED_INFO=`df -lh | awk '$NF == "/" {print "Partition: /, Size: "$2", Used: "$3", Avail: "$4", Use%: "$5}'`
    log $USED_INFO
}

#安全的删除domain的日志目录(只允许删除/apps/logs/log_receiver/和/apps/dat/kubernetes-pods目录下的子目录)
function rm_domain_dir() {
    DIR_ORIGINAL_PATH="$1"
    if [[ -d "${DIR_ORIGINAL_PATH}" ]]
    then
        DIR_REAL_PATH=`readlink -f "$DIR_ORIGINAL_PATH"`
        if echo "${DIR_REAL_PATH}" | egrep "^/apps/logs/log_receiver/[^/]{1,}$|^/apps/dat/kubernetes-pods/[^/]{1,}$" >/dev/null 2>&1
        then
            if [[ "${DIR_REAL_PATH}" != "/apps/logs/log_receiver/ai-noah.vip.vip.com" ]]
            then
                rm -rf "${DIR_REAL_PATH}"
                log "Remove directory: ${DIR_REAL_PATH}"
            fi
        else
            log "[DENIED] Failed to remove directory: ${DIR_ORIGINAL_PATH}"
        fi
    else
        log "[NOT EXIST] Failed to remove directory: ${DIR_ORIGINAL_PATH}"
    fi
}

#安全的删除pod的日志目录(只允许删除/apps/logs/log_receiver/<domain_name>/和/apps/dat/kubernetes-pods/<domain_name>/目录下的子目录)
function rm_pod_dir() {
    DIR_ORIGINAL_PATH="$1"
    if [[ -d "${DIR_ORIGINAL_PATH}" ]]
    then
        DIR_REAL_PATH=`readlink -f "$DIR_ORIGINAL_PATH"`
        if echo "${DIR_REAL_PATH}" | egrep "^/apps/logs/log_receiver/[^/]{1,}/[^/]{1,}$|^/apps/dat/kubernetes-pods/[^/]{1,}/[^/]{1,}$" >/dev/null 2>&1
        then
            rm -rf "${DIR_REAL_PATH}"
            log "Remove directory: ${DIR_REAL_PATH}"
        else
            log "[DENIED] Failed to remove directory: ${DIR_ORIGINAL_PATH}"
        fi
    else
        log "[NOT EXIST] Failed to remove directory: ${DIR_ORIGINAL_PATH}"
    fi
}

#安全的删除pod日志目录下的文件(只允许删除/apps/logs/log_receiver/<domain_name>/<pod_name>/目录下的文件)
function rm_pod_file() {
    FILE_ORIGINAL_PATH="$1"
    if [[ -f "${FILE_ORIGINAL_PATH}" ]]
    then
        FILE_REAL_PATH=`readlink -f "$FILE_ORIGINAL_PATH"`
        if echo "${FILE_REAL_PATH}" | egrep "^/apps/logs/log_receiver/[^/]{1,}/[^/]{1,}/[^/]{1,}$" >/dev/null 2>&1
        then
            log_truncate "${FILE_REAL_PATH}"
        else
            log "[DENIED] Failed to delete file: ${FILE_ORIGINAL_PATH}"
        fi
    else
        log "[NOT EXIST] Failed to delete file: ${FILE_ORIGINAL_PATH}"
    fi
}

#清理pod的日志目录(pod消亡后日志目录保留10天)
function clean_logs_dir_pod() {
    if systemctl status vip-docker >/dev/null 2>&1 && [[ -n $(docker ps -qa) ]]
    then
        LOGS_GC_DAYS=10
        LOGS_GC_SECONDS=$(( 86400 * ${LOGS_GC_DAYS} ))

        NOW_TIMESTAMP=$(date +%s)
        let LOGS_GC_TIMESTAMP=$(( $NOW_TIMESTAMP - $LOGS_GC_SECONDS ))

        POD_NAME_LIST=$(docker inspect --format "{{index .Config.Labels \"io.kubernetes.pod.name\" }}" $(docker ps -qa) | sort | uniq | sed '/^$/d')

        if [[ $LOGS_GC_SECONDS -gt 0 ]]
        then
            while read LINE
            do
                read DOMAIN_NAME POD_DIR_NAME < <(echo "$LINE")
                if [[ -n $DOMAIN_NAME ]] && [[ -n $POD_DIR_NAME ]]
                then
                    POD_NAME=`echo $POD_DIR_NAME | awk -F@ '{print $1}'`
                    echo $POD_NAME_LIST | tr ' ' '\n' | grep "^${POD_NAME}$" >/dev/null 2>&1
                    if [[ $? -ne 0 ]] && [[ -n $POD_NAME ]]
                    then
                        LOG_DIR_MODIFY_TIME=$(stat ${LOGS_ROOT_DIR}/${DOMAIN_NAME}/${POD_DIR_NAME} | grep 'Modify:' | grep -oE '[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}')
                        LOG_DIR_CHANGE_TIME=$(stat ${LOGS_ROOT_DIR}/${DOMAIN_NAME}/${POD_DIR_NAME} | grep 'Change:' | grep -oE '[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}')
                        LOG_DIR_MODIFY_TIMESTAMP=$(date -d "`echo $LOG_DIR_MODIFY_TIME`" +%s)
                        LOG_DIR_CHANGE_TIMESTAMP=$(date -d "`echo $LOG_DIR_CHANGE_TIME`" +%s)
                        [[ $LOG_DIR_CHANGE_TIMESTAMP -ge $LOG_DIR_MODIFY_TIMESTAMP ]] && LOG_DIR_LAST_USED_TIMESTAMP=$LOG_DIR_CHANGE_TIMESTAMP || LOG_DIR_LAST_USED_TIMESTAMP=$LOG_DIR_MODIFY_TIMESTAMP
                        if [[ $LOG_DIR_LAST_USED_TIMESTAMP -le $LOGS_GC_TIMESTAMP ]]
                        then
                            LOGS_MODIFIED_NUM=$(find ${LOGS_ROOT_DIR}/${DOMAIN_NAME}/${POD_DIR_NAME} -type f \( -mtime -${LOGS_GC_DAYS} -or -ctime -${LOGS_GC_DAYS} \) 2>/dev/null | wc -l)
                            LOGS_OPENED_NUM=$(find ${LOGS_ROOT_DIR}/${DOMAIN_NAME}/${POD_DIR_NAME} -type f -exec fuser -s "{}" 2>/dev/null \; -exec echo {} \; | wc -l)

                            #如果Pod日志目录下,10天内没有修改过过任何文件,且当前没有任何文件被进程打开,则删除此目录
                            if [[ $LOGS_MODIFIED_NUM -eq 0 ]] && [[ $LOGS_OPENED_NUM -eq 0 ]]
                            then
                                find ${LOGS_ROOT_DIR}/${DOMAIN_NAME}/${POD_DIR_NAME}/ -type f -delete 2>/dev/null | xargs -I {} echo "$(date +'[%F %T]') Delete file:" {} >>$LOG_FILE 2>&1
                                rm_pod_dir "${LOGS_ROOT_DIR}/${DOMAIN_NAME}/${POD_DIR_NAME}"
                            else
                                log "[UNEXPIRED|OPENED] Skip directory: ${LOGS_ROOT_DIR}/${DOMAIN_NAME}/${POD_DIR_NAME}"
                            fi

                            #如果目录${LOGS_ROOT_DIR}/${DOMAIN_NAME}为空,则删除此目录
                            if [[ -z $(ls -A "${LOGS_ROOT_DIR}/${DOMAIN_NAME}") ]]
                            then
                                rm_domain_dir "${LOGS_ROOT_DIR}/${DOMAIN_NAME}"
                            fi
                        else
                            log "[UNEXPIRED] Skip directory: ${LOGS_ROOT_DIR}/${DOMAIN_NAME}/${POD_DIR_NAME}"
                        fi
                    else
                        log "[CONTAINER EXIST] Skip directory: ${LOGS_ROOT_DIR}/${DOMAIN_NAME}/${POD_DIR_NAME}"
                    fi
                else
                    #如果${LOGS_ROOT_DIR}/${DOMAIN_NAME}为目录,且此目录为空
                    if [[ -n $DOMAIN_NAME ]] && [[ -d "${LOGS_ROOT_DIR}/${DOMAIN_NAME}" ]] && [[ -z $(ls -A "${LOGS_ROOT_DIR}/${DOMAIN_NAME}") ]]
                    then
                        LOG_DIR_MODIFY_TIME=$(stat ${LOGS_ROOT_DIR}/${DOMAIN_NAME} | grep 'Modify:' | grep -oE '[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}')
                        LOG_DIR_CHANGE_TIME=$(stat ${LOGS_ROOT_DIR}/${DOMAIN_NAME} | grep 'Change:' | grep -oE '[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}')
                        LOG_DIR_MODIFY_TIMESTAMP=$(date -d "`echo $LOG_DIR_MODIFY_TIME`" +%s)
                        LOG_DIR_CHANGE_TIMESTAMP=$(date -d "`echo $LOG_DIR_CHANGE_TIME`" +%s)
                        [[ $LOG_DIR_CHANGE_TIMESTAMP -ge $LOG_DIR_MODIFY_TIMESTAMP ]] && LOG_DIR_LAST_USED_TIMESTAMP=$LOG_DIR_CHANGE_TIMESTAMP || LOG_DIR_LAST_USED_TIMESTAMP=$LOG_DIR_MODIFY_TIMESTAMP
                        INTERVAL=$(( $NOW_TIMESTAMP - $LOG_DIR_LAST_USED_TIMESTAMP ))
                        #如果此目录12小时内没有修改过过,并且目录为空,则删除此目录
                        if [[ $INTERVAL -ge 43200 ]] && [[ -z $(ls -A "${LOGS_ROOT_DIR}/${DOMAIN_NAME}") ]]
                        then
                            rm_domain_dir "${LOGS_ROOT_DIR}/${DOMAIN_NAME}"
                        fi
                    fi
                fi
            done< <(find ${LOGS_ROOT_DIR}/* -maxdepth 1 -type d 2>/dev/null | sort -r | awk -F/ '{print $5,$6}')
        fi
    fi
}

#清理pod的临时数据目录(pod销毁后即可删除)
function clean_tmp_data_dir_pod() {
    if systemctl status vip-docker >/dev/null 2>&1 && [[ -n $(docker ps -qa) ]]
    then
        POD_NAME_LIST=$(docker inspect --format "{{index .Config.Labels \"io.kubernetes.pod.name\" }}" $(docker ps -qa) | sort | uniq | sed '/^$/d')
        while read LINE
        do
            read DOMAIN_NAME POD_DIR_NAME < <(echo "$LINE")
            if [[ -n $DOMAIN_NAME ]] && [[ -n $POD_DIR_NAME ]]
            then
                POD_NAME=`echo $POD_DIR_NAME | awk -F@ '{print $1}'`
                echo $POD_NAME_LIST | tr ' ' '\n' | grep "^${POD_NAME}$" >/dev/null 2>&1
                if [[ $? -ne 0 ]] && [[ -n $POD_NAME ]]
                then
                    rm_pod_dir "${TMP_DATA_ROOT_DIR}/${DOMAIN_NAME}/${POD_DIR_NAME}"
                    #如果目录${TMP_DATA_ROOT_DIR}/${DOMAIN_NAME}为空,则删除此目录
                    if [[ -z $(ls -A "${TMP_DATA_ROOT_DIR}/${DOMAIN_NAME}") ]]
                    then
                        rm_domain_dir "${TMP_DATA_ROOT_DIR}/${DOMAIN_NAME}"
                    fi
                else
                    log "[CONTAINER EXIST] Skip directory: ${TMP_DATA_ROOT_DIR}/${DOMAIN_NAME}/${POD_DIR_NAME}"
                fi
            else
                #如果${TMP_DATA_ROOT_DIR}/${DOMAIN_NAME}为目录,且此目录为空
                if [[ -n $DOMAIN_NAME ]] && [[ -d "${TMP_DATA_ROOT_DIR}/${DOMAIN_NAME}" ]] && [[ -z $(ls -A "${TMP_DATA_ROOT_DIR}/${DOMAIN_NAME}") ]]
                then
                    rm_domain_dir "${TMP_DATA_ROOT_DIR}/${DOMAIN_NAME}"
                fi
            fi
        done< <(find ${TMP_DATA_ROOT_DIR}/* -maxdepth 1 -type d 2>/dev/null | sort -r | awk -F/ '{print $5,$6}')
    fi
}

#清理ai集群的pod tmp目录
function clean_tmp_dir_ai() {
    if [[ -d $AI_TMP_ROOT_DIR ]] && systemctl status vip-docker >/dev/null 2>&1 && [[ -n $(docker ps -qa) ]]
    then
        TMP_GC_DAYS=1
        TMP_GC_SECONDS=$(( 86400 * ${TMP_GC_DAYS} ))

        NOW_TIMESTAMP=$(date +%s)
        let TMP_GC_TIMESTAMP=$(( $NOW_TIMESTAMP - $TMP_GC_SECONDS ))

        POD_NAME_LIST=$(docker inspect --format "{{index .Config.Labels \"io.kubernetes.pod.name\" }}" $(docker ps -qa) | sort | uniq | sed '/^$/d')

        if [[ $TMP_GC_SECONDS -gt 0 ]]
        then
            while read TMP_DIR_NAME
            do
                POD_NAME=`echo $TMP_DIR_NAME | awk -F@ '{print $1}'`
                TMP_DIR="${AI_TMP_ROOT_DIR}/${TMP_DIR_NAME}"
                echo $POD_NAME_LIST | tr ' ' '\n' | grep "^${POD_NAME}$" >/dev/null 2>&1
                if [[ $? -ne 0 ]] && [[ -n $POD_NAME ]]
                then
                    TMP_DIR_MODIFY_TIME=$(stat ${TMP_DIR} | grep 'Modify:' | grep -oE '[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}')
                    TMP_DIR_CHANGE_TIME=$(stat ${TMP_DIR} | grep 'Change:' | grep -oE '[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}')
                    TMP_DIR_MODIFY_TIMESTAMP=$(date -d "`echo $TMP_DIR_MODIFY_TIME`" +%s)
                    TMP_DIR_CHANGE_TIMESTAMP=$(date -d "`echo $TMP_DIR_CHANGE_TIME`" +%s)
                    [[ $TMP_DIR_CHANGE_TIMESTAMP -ge $TMP_DIR_MODIFY_TIMESTAMP ]] && TMP_DIR_LAST_USED_TIMESTAMP=$TMP_DIR_CHANGE_TIMESTAMP || TMP_DIR_LAST_USED_TIMESTAMP=$TMP_DIR_MODIFY_TIMESTAMP
                    if [[ $TMP_DIR_LAST_USED_TIMESTAMP -le $TMP_GC_TIMESTAMP ]]
                    then
                        if echo "${TMP_DIR}" | egrep "/apps/tmp/ai-noah.vip.vip.com/[^/]{1,}$" >/dev/null 2>&1
                        then
                            rm -rf "${TMP_DIR}"
                            log "Remove directory: ${TMP_DIR}"
                        else
                            log "[DENIED] Failed to remove directory: ${TMP_DIR}"
                        fi
                    else
                        log "[UNEXPIRED] Skip directory: ${TMP_DIR}"
                    fi
                else
                    log "[CONTAINER EXIST] Skip directory: ${TMP_DIR}"
                fi
            done< <(find ${AI_TMP_ROOT_DIR}/ -maxdepth 1 -type d 2>/dev/null | sort -r | awk -F/ '$5 != "" {print $5}')
        fi
    fi
}

function clean_tmp_dir_spark() {
    if [[ -d $SPARK_TMP_ROOT_DIR ]]
    then
        TMP_GC_DAYS=1
        TMP_GC_SECONDS=$(( 86400 * ${TMP_GC_DAYS} ))

        NOW_TIMESTAMP=$(date +%s)
        let TMP_GC_TIMESTAMP=$(( $NOW_TIMESTAMP - $TMP_GC_SECONDS))

        if [[ $TMP_GC_SECONDS -gt 0 ]]
        then
            while read TMP_DIR
            do
                TMP_DIR_MODIFY_TIME=$(stat ${TMP_DIR} | grep 'Modify:' | grep -oE '[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}')
                TMP_DIR_CHANGE_TIME=$(stat ${TMP_DIR} | grep 'Change:' | grep -oE '[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}')
                TMP_DIR_MODIFY_TIMESTAMP=$(date -d "`echo $TMP_DIR_MODIFY_TIME`" +%s)
                TMP_DIR_CHANGE_TIMESTAMP=$(date -d "`echo $TMP_DIR_CHANGE_TIME`" +%s)
                [[ $TMP_DIR_CHANGE_TIMESTAMP -ge $TMP_DIR_MODIFY_TIMESTAMP ]] && TMP_DIR_LAST_USED_TIMESTAMP=$TMP_DIR_CHANGE_TIMESTAMP || TMP_DIR_LAST_USED_TIMESTAMP=$TMP_DIR_MODIFY_TIMESTAMP
                if [[ $TMP_DIR_LAST_USED_TIMESTAMP -le $TMP_GC_TIMESTAMP ]]
                then
                    if echo "${TMP_DIR}" | egrep "/apps/tmp/spark-lab/spark-[^/]{1,}$" >/dev/null 2>&1
                    then
                        rm -rf "${TMP_DIR}"
                        log "Remove directory: ${TMP_DIR}"
                    else
                        log "[DENIED] Failed to remove directory: ${TMP_DIR}"
                    fi
                else
                    log "[UNEXPIRED] Skip directory: ${TMP_DIR}"
                fi
            done< <(ls -1d ${SPARK_TMP_ROOT_DIR}/spark-* 2>/dev/null)
        fi

        TMP_GC_SIZE_GB=350
        TMP_GC_SIZE_KB=$(( 1024 * 1024 * ${TMP_GC_SIZE_GB} ))
        while read TMP_DIR_SIZE TMP_DIR
        do
            if echo "${TMP_DIR}" | egrep "/apps/tmp/spark-lab/spark-[^/]{1,}$" >/dev/null 2>&1
            then
                rm -rf "${TMP_DIR}"
                log "Remove directory: ${TMP_DIR} ($(($TMP_DIR_SIZE / 1024 / 1024))GB)"
            else
                log "[DENIED] Failed to remove directory: ${TMP_DIR}"
            fi
        done< <(du -s ${SPARK_TMP_ROOT_DIR}/spark-* 2>/dev/null | awk '$1 > TMP_GC_SIZE_KB' TMP_GC_SIZE_KB=$TMP_GC_SIZE_KB)
    fi
}

#清理系统日志
function clean_logs_sys() {
    log_used_info
    log "Start deleting logs of system."
    find /var/log/ -maxdepth 1 -type f -name "*-[0-9]*" -mtime +89 -ctime +89 ! -exec fuser -s "{}" 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo "$(date +'[%F %T]') Delete file:" {} >>$LOG_FILE 2>&1
}

#清理定时任务的日志
function clean_logs_cron() {
    log_used_info
    log "Start deleting logs of cron."
    find /apps/logs/cron/ -type f -mtime +89 -ctime +89 ! -exec fuser -s "{}" 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo "$(date +'[%F %T]') Delete file:" {} >>$LOG_FILE 2>&1
    find /apps/logs/cron/ -type f -name "clean_logs.*.lock" ! -name "${DAY_LOCK_FILENAME}" ! -exec fuser -s "{}" 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo "$(date +'[%F %T]') Delete file:" {} >>$LOG_FILE 2>&1
}

#清理openvswitch的日志
function clean_logs_openvswitch() {
    log_used_info
    log "Start deleting logs of openvswitch."
    find /var/log/openvswitch/ -type f -name "*.gz" -mtime +29 -ctime +29 ! -exec fuser -s "{}" 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo "$(date +'[%F %T]') Delete file:" {} >>$LOG_FILE 2>&1
}

#清理netplugin的日志
function clean_logs_netplugin() {
    log_used_info
    log "Start deleting logs of netplugin."
    find /apps/logs/netplugin/ -type f -name "netplugin.log-*" -mtime +29 -ctime +29 ! -exec fuser -s "{}" 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo "$(date +'[%F %T]') Delete file:" {} >>$LOG_FILE 2>&1
}

#清理docker的日志
function clean_logs_docker() {
    log_used_info
    log "Start deleting logs of docker."
    find /apps/logs/docker/ -type f -name "dockerd.log-*" -mtime +29 -ctime +29 ! -exec fuser -s "{}" 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo "$(date +'[%F %T]') Delete file:" {} >>$LOG_FILE 2>&1
}

#清理kubernetes的日志
function clean_logs_kubernetes() {
    log_used_info
    log "Start deleting logs of kubernetes."
    #通过chmod 644命令先将日志目录下的软链文件的Modify Time和Change Time改为当前时间,避免软链文件被find命令删除
    find /apps/logs/kubernetes/ -type l 2>/dev/null | xargs -I {} readlink -f {} | xargs -I {} chmod 644 {}
    find /apps/logs/kubernetes/ -type f -mtime +29 -ctime +29 ! -exec fuser -s "{}" 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo "$(date +'[%F %T]') Delete file:" {} >>$LOG_FILE 2>&1

    #通过chmod 644命令先将日志目录下的软链文件的Modify Time和Change Time改为当前时间,避免软链文件被find命令删除
    find /apps/logs/kubelet/ -type l 2>/dev/null | xargs -I {} readlink -f {} | xargs -I {} chmod 644 {}
    find /apps/logs/kubelet/ -type f -mtime +20 -ctime +20 ! -exec fuser -s "{}" 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo "$(date +'[%F %T]') Delete file:" {} >>$LOG_FILE 2>&1
}

#清理smart-agent的日志
function clean_logs_smart_agent() {
    log_used_info
    log "Start deleting logs of smart-agent."
    find /apps/logs/smart-agent/ -type f -name "*.log.[0-9]*" -mtime +6 ! -exec fuser -s "{}" 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo "$(date +'[%F %T]') Delete file:" {} >>$LOG_FILE 2>&1
}

#清理vfilebeat的日志
function clean_logs_vfilebeat() {
    log_used_info
    log "Start deleting logs of vfilebeat."
    find /apps/logs/vfilebeat/ -type f -name "vfilebeat*.log.*" -mtime +6 ! -exec fuser -s "{}" 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo "$(date +'[%F %T]') Delete file:" {} >>$LOG_FILE 2>&1
}

#清理osp proxy的日志
function clean_logs_osp_proxy() {
    log_used_info
    LOGS_GC_DAYS=$1
    [[ $LOGS_GC_DAYS -lt 1 ]] && LOGS_GC_DAYS=1
    let LOGS_GC_DAYS=$[ LOGS_GC_DAYS - 1 ]
    log "Start deleting logs of osp proxy."
    find /apps/logs/log_receiver/noah-osp-proxy-local.vip.vip.com/ -type f -name "*.log" -mtime +${LOGS_GC_DAYS} ! -exec fuser -s "{}" 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo "$(date +'[%F %T]') Delete file:" {} >>$LOG_FILE 2>&1
}

#清理coredump的日志
function clean_logs_coredump() {
    log_used_info
    log "Start deleting logs of coredump."
    find ${LOGS_ROOT_DIR}/*/*/ -maxdepth 1 -type f -name "core-*-*-*" -mtime +1 ! -exec fuser -s "{}" 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo "$(date +'[%F %T]') Delete file:" {} >>$LOG_FILE 2>&1
}

#按天删除业务日志
function clean_logs_receiver_by_day() {
    log_used_info
    LOGS_GC_PATH=$1
    LOGS_GC_DAYS=$2
    [[ $LOGS_GC_DAYS -lt 1 ]] && LOGS_GC_DAYS=1
    let LOGS_GC_DAYS=$[ LOGS_GC_DAYS - 1 ]
    if echo $LOGS_GC_PATH | grep "/apps/logs/log_receiver" >/dev/null 2>&1
    then
        log "Start deleting old log files in ${LOGS_GC_PATH} (-mtime +${LOGS_GC_DAYS})"
        find ${LOGS_GC_PATH} -type f -mtime +${LOGS_GC_DAYS} ! -name "*.pid" ! -name "osp-osp.out" ! -name "hs_err*.log" ! -name "osp*_class.his" ! -name "java_error*.log" ! -name "gc-*.log" ! -name "catalina.out*" ! -exec fuser -s "{}" 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo "$(date +'[%F %T]') Delete file:" {} >>$LOG_FILE 2>&1

        log "Start deleting old log files in ${LOGS_GC_PATH} (-mtime +${LOGS_GC_DAYS} -size +50M)"
        find ${LOGS_GC_PATH} -type f -mtime +${LOGS_GC_DAYS} \( -name "osp-osp.out" -or -name "catalina.out*" \) -size +50M ! -exec fuser -s "{}" 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo "$(date +'[%F %T]') Delete file:" {} >>$LOG_FILE 2>&1

        log "Start deleting old log files in ${LOGS_GC_PATH} (-mtime +1 -size +3G)"
        find ${LOGS_GC_PATH} -type f -mtime +1               \( -name "osp-osp.out" -or -name "catalina.out*" \) -size +3G  ! -exec fuser -s "{}" 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo "$(date +'[%F %T]') Delete file:" {} >>$LOG_FILE 2>&1
    fi
}

#按分钟删除业务日志
function clean_logs_receiver_by_minute() {
    log_used_info
    LOGS_GC_PATH=$1
    LOGS_GC_MINUTES=$2
    if echo $LOGS_GC_PATH | grep "/apps/logs/log_receiver" >/dev/null 2>&1
    then
        log "Start deleting old log files in ${LOGS_GC_PATH} (-mmin +${LOGS_GC_MINUTES})"
        find ${LOGS_GC_PATH} -type f -mmin +${LOGS_GC_MINUTES} ! -name "*.pid" ! -name "osp-osp.out" ! -name "hs_err*.log" ! -name "osp*_class.his" ! -name "java_error*.log" ! -name "gc-*.log" ! -exec fuser -s "{}" 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo "$(date +'[%F %T]') Delete file:" {} >>$LOG_FILE 2>&1
    fi
}

#获取根分区的使用率
function get_used_rate() {
    USED_RATE=`df -l 2>/dev/null | awk '$NF == "/" {print $5*1}'`
    if [[ -n $USED_RATE ]]
    then
        echo $USED_RATE
    else
        log "ERROR: Failed to get used rate of root pattition."
        exit 1
    fi
}

#获取日志量最大的Pod目录
function get_target_log_dir() {
    TARGET_LOG_DIR=`du -s ${LOGS_ROOT_DIR}/*/*/ /apps/logs/osp/ 2>/dev/null | sort -rn | head -n 1 | awk '{print $2}'`
    if [[ -n $USED_RATE ]]
    then
        echo $TARGET_LOG_DIR
    else
        log "ERROR: Failed to get target log dir."
        exit 1
    fi
}

function clean_logs_unimportant() {
    log_used_info

    #清理trace日志
    if [[ $USED_RATE -ge $TARGET_RATE ]]
    then
        find ${LOGS_ROOT_DIR}/*/*/trace/logs/ -type f -name "trace-log.out*" ! -name "trace-log.out" ! -name "trace-log.out.1" ! -exec fuser -s "{}" 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo "$(date +'[%F %T]') Delete file:" {} >>$LOG_FILE 2>&1
        USED_RATE=`get_used_rate`
    fi

    #清空文件大小大于1GB的osp-osp.out和catalina.out
    if [[ $USED_RATE -ge $TARGET_RATE ]]
    then
        while read file_path
        do
            if [[ $USED_RATE -ge $TARGET_RATE ]]
            then
                log_truncate $file_path
            else
                break
            fi
            USED_RATE=`get_used_rate`
        done< <(find $LOGS_ROOT_DIR -type f \( -name "osp-osp.out" -or -name "catalina.out*" \) -size +1G -exec ls -l {} \; 2>/dev/null | sort -rn -k5 | awk '{print $NF}')
    fi

    #每个pod的目录下最多只保留一个coredump文件
    if [[ $USED_RATE -ge $TARGET_RATE ]]
    then
        while read pod_dir
        do
            while read file_path
            do
                rm_pod_file ${file_path}
            done< <(find ${pod_dir} -maxdepth 1 -type f -name "core-*-*-*" -exec ls -l --time-style="+%s" {} \; 2>/dev/null | sort -n -k6 | sed '$d' | awk '{print $NF}')
        done< <(find ${LOGS_ROOT_DIR}/*/*/ -maxdepth 1 -type f -name "core-*-*-*" 2>/dev/null | sed "s|/[^/]*$||" | sort | uniq)
        USED_RATE=`get_used_rate`
    fi

    #每台宿主机只保留最近的四个coredump文件
    if [[ $USED_RATE -ge $TARGET_RATE ]]
    then
        while read file_path
        do
            rm_pod_file ${file_path}
        done< <(find ${LOGS_ROOT_DIR}/*/*/ -maxdepth 1 -type f -name "core-*-*-*" -exec ls -l --time-style="+%s" {} \; 2>/dev/null | sort -rn -k6 | sed '1,4d' | awk '{print $NF}')
        USED_RATE=`get_used_rate`
    fi

    #删除比较大的osp proxy日志
    if [[ $USED_RATE -ge $TARGET_RATE ]]
    then
        find /apps/logs/log_receiver/noah-osp-proxy-local.vip.vip.com/ -type f -name "*.log" -mtime +1 -size +10G ! -exec fuser -s "{}" 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo "$(date +'[%F %T]') Delete file:" {} >>$LOG_FILE 2>&1
        find /apps/logs/log_receiver/noah-osp-proxy-local.vip.vip.com/ -type f -name "*.log" -mtime +0 -size +20G ! -exec fuser -s "{}" 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo "$(date +'[%F %T]') Delete file:" {} >>$LOG_FILE 2>&1
        USED_RATE=`get_used_rate`
    fi

    #删除2天内没有被修改且当前没有被任何进程打开的access日志
    if [[ $USED_RATE -ge $TARGET_RATE ]]
    then
        find ${LOGS_ROOT_DIR}/*/*/ -type f -mtime +1 -name "*access*.log" ! -exec fuser -s "{}" 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo "$(date +'[%F %T]') Delete file:" {} >>$LOG_FILE 2>&1
        USED_RATE=`get_used_rate`
    fi

    #清空文件大小大于70G的osp proxy日志
    if [[ $USED_RATE -ge $TARGET_RATE ]]
    then
        while read file_path
        do
            log_truncate ${file_path}
        done< <(find /apps/logs/log_receiver/noah-osp-proxy-local.vip.vip.com/ -type f -name "*.log" -size +70G 2>/dev/null)
        USED_RATE=`get_used_rate`
    fi
}

#按天删除业务日志
function log_gc_by_day() {
    log_used_info

    USED_RATE=`get_used_rate`
    let APP_LOGS_GC_DAYS=$[ APP_LOGS_GC_DAYS -1 ]

    #按天依次删除所有Pod的最早的业务日志,直到根分区的使用率小余变量TARGET_RATE设置的值或者只剩下最近2天的业务日志
    while [[ $USED_RATE -ge $TARGET_RATE ]] && [[ $APP_LOGS_GC_DAYS -ge 2 ]]
    do
        clean_logs_receiver_by_day $LOGS_ROOT_DIR $APP_LOGS_GC_DAYS
        let APP_LOGS_GC_DAYS=$[ APP_LOGS_GC_DAYS -1 ]
        USED_RATE=`get_used_rate`
    done

    #循环查找日志量最大的Pod日志目录,按天依次删除此Pod日志目录下最早的业务日志,直到根分区的使用率小余变量TARGET_RATE设置的值或者只剩下最近1天的业务日志
    declare -A PODS_ARRAY
    PROXY_GC_DAYS=2

    while [[ $USED_RATE -ge $TARGET_RATE ]]
    do
        TARGET_LOG_DIR=`get_target_log_dir`
        if [[ $TARGET_LOG_DIR != "/apps/logs/osp/" ]]
        then
            TARGET_KEY=`echo $TARGET_LOG_DIR | awk -F/ '{print $(NF-2)"_"$(NF-1)}'`

            if [[ -z ${PODS_ARRAY[$TARGET_KEY]} ]]
            then
                PODS_ARRAY[$TARGET_KEY]=$APP_LOGS_GC_DAYS
            fi

            if [[ ${PODS_ARRAY[$TARGET_KEY]} -gt 0 ]]
            then
                clean_logs_receiver_by_day $TARGET_LOG_DIR ${PODS_ARRAY[$TARGET_KEY]}
                PODS_ARRAY[$TARGET_KEY]=`expr ${PODS_ARRAY[$TARGET_KEY]} - 1`
            else
                break
            fi
        else
            if [[ $PROXY_GC_DAYS -ge 0 ]]
            then
                clean_logs_osp_proxy $PROXY_GC_DAYS
                let PROXY_GC_DAYS=$[ PROXY_GC_DAYS - 1 ]
            else
                break
            fi
        fi

        sleep 1
        USED_RATE=`get_used_rate`
    done
}

#循环查找日志量最大的pod目录,按最后修改时间依次删除此Pod目录下最近没有被修改且没有被任何进程打开的文件,直到根分区的使用率小余变量TARGET_RATE设置的值
function log_gc_by_minute() {
    log_used_info
    USED_RATE=`get_used_rate`
    APP_LOGS_GC_HOUR=21

    while [[ $USED_RATE -ge $TARGET_RATE ]]
    do
        TARGET_LOG_DIR=`get_target_log_dir`
        if [[ $TARGET_LOG_DIR != "/apps/logs/osp/" ]]
        then
            if [[ $APP_LOGS_GC_HOUR -ge 1 ]]
            then
                let APP_LOGS_GC_MINUTE=$[ APP_LOGS_GC_HOUR * 60 ]
                clean_logs_receiver_by_minute $TARGET_LOG_DIR $APP_LOGS_GC_MINUTE
                let APP_LOGS_GC_HOUR=$[ APP_LOGS_GC_HOUR - 4 ]
            else
                break
            fi
        else
            break
        fi
        sleep 1
        USED_RATE=`get_used_rate`
    done

    check_vfilebeat_agent_fd

    APP_LOGS_GC_MINUTE=45
    if [[ $USED_RATE -ge $TARGET_RATE ]]
    then
        log "systemctl stop vip-vfilebeat-agent"
        systemctl stop vip-vfilebeat-agent

        while [[ $USED_RATE -ge $TARGET_RATE ]]
        do
            TARGET_LOG_DIR=`get_target_log_dir`
            if [[ $TARGET_LOG_DIR != "/apps/logs/osp/" ]]
            then
                if [[ $APP_LOGS_GC_MINUTE -ge 5 ]]
                then
                    clean_logs_receiver_by_minute $TARGET_LOG_DIR $APP_LOGS_GC_MINUTE
                    let APP_LOGS_GC_MINUTE=$[ APP_LOGS_GC_MINUTE - 20 ]
                else
                    break
                fi
            else
                break
            fi
            sleep 1
            USED_RATE=`get_used_rate`
        done

        log "systemctl start vip-vfilebeat-agent"
        systemctl start vip-vfilebeat-agent
    fi
}

#获取文件的大小,单位为GB,若文件大小小余1GB,则返回0
function get_file_size() {
    FILE_PATH=$1
    FILE_SIZE_BYTES=`ls -l $FILE_PATH 2>/dev/null | awk '{print $5}'`
    [[ -z $FILE_SIZE_BYTES ]] && FILE_SIZE_BYTES=0
    FILE_SIZE_GB=`echo "$FILE_SIZE_BYTES/1073741824" | bc`
    echo $FILE_SIZE_GB
}

#使用truncate命令逐步清空大文件(可避免一次清空超大文件导致IO飙升的问题)
function log_truncate() {
    FILE_PATH=$1
    FILE_SIZE=`get_file_size $FILE_PATH`
    TARGET_SIZE=3
    while [[ $TARGET_SIZE -ne 0 ]]
    do
        let TARGET_SIZE=$[ FILE_SIZE - 5 ]
        [[ $TARGET_SIZE -lt 0 ]] && TARGET_SIZE=0

        log "Truncate file: ${FILE_PATH} (${FILE_SIZE}GB -> ${TARGET_SIZE}GB)"
        truncate -s ${TARGET_SIZE}G $FILE_PATH

        #如果文件已被清空且没有被任何进程打开则删除此文件
        if [[ $TARGET_SIZE -eq 0 ]]
        then
            find ${FILE_PATH} ! -exec fuser -s "{}" 2>/dev/null \; -exec echo {} \; -delete | xargs -I {} echo "$(date +'[%F %T]') Delete file:" {} >>$LOG_FILE 2>&1
        fi

        sleep 1
        FILE_SIZE=`get_file_size $FILE_PATH`
    done
}

#按文件大小排序,依次清空最大的文件,直到根分区的使用率小余变量TARGET_RATE设置的值
function log_gc_by_file() {
    LOOP=0
    while read file_path
    do
        USED_RATE=`get_used_rate`
        if [[ $USED_RATE -ge $TARGET_RATE ]]
        then
            log_truncate $file_path 
        else
            break
        fi
        let LOOP++
        [[ $LOOP -ge 50 ]] && break
    done< <(find /apps/logs/ -type f -size +1G -exec ls -l {} \; 2>/dev/null | sort -rn -k5 | awk '{print $NF}')
}

function check_vfilebeat_agent_fd() {
    VFILEBEAT_AGENT_PIDS=$(pidof /apps/svr/vfilebeat/vfilebeat 2>/dev/null | tr ' ' ',')
    if [[ -n $VFILEBEAT_AGENT_PIDS ]]
    then
        DELETED_FD_NUM=$(eval "ls -l /proc/{$VFILEBEAT_AGENT_PIDS}/fd | grep -c deleted")
        if [[ $DELETED_FD_NUM -ge 3 ]]
        then
            DELETED_FILES=`eval "sudo ls -l /proc/{$VFILEBEAT_AGENT_PIDS}/fd | grep deleted" | awk '{print $(NF-1)}' | tr '\n' '|' | sed 's/|$//'`
            log "vfilebeat unreleased file descriptor: $DELETED_FD_NUM"
            log "vfilebeat unreleased file path: $DELETED_FILES"
            log "systemctl restart vip-vfilebeat-agent"
            systemctl restart vip-vfilebeat-agent
        fi
    fi
}

log "===================================================================================================="

# 每天凌晨3点执行一次
if [[ $CURRENT_H -eq 3 ]] && [[ ! -e $DAY_LOCK_FILE ]]
then
    touch $DAY_LOCK_FILE
    if [[ $? -eq 0 ]]
    then
        check_vfilebeat_agent_fd
        clean_logs_sys
        clean_logs_cron
        clean_logs_openvswitch
        clean_logs_netplugin
        clean_logs_docker
        clean_logs_kubernetes
        clean_logs_smart_agent
        clean_logs_vfilebeat
        clean_logs_osp_proxy 3
        clean_logs_receiver_by_day $LOGS_ROOT_DIR $APP_LOGS_GC_DAYS
        clean_logs_dir_pod
        clean_tmp_data_dir_pod
        clean_logs_coredump
        clean_tmp_dir_spark
        clean_tmp_dir_ai
        check_vfilebeat_agent_fd
    fi
fi

USED_RATE=`get_used_rate`

if [[ $USED_RATE -ge $GC_RATE ]]
then
    check_vfilebeat_agent_fd
    clean_tmp_dir_spark
    clean_tmp_dir_ai
    clean_logs_unimportant
    # 清理已销毁容器的临时数据目录
    clean_tmp_data_dir_pod
    USED_RATE=`get_used_rate`

    if [[ $USED_RATE -ge $TARGET_RATE ]]
    then
        log_gc_by_day
        USED_RATE=`get_used_rate`
    fi
    check_vfilebeat_agent_fd

    if [[ $USED_RATE -ge $TARGET_RATE ]]
    then
        log_gc_by_minute
        USED_RATE=`get_used_rate`
    fi
    check_vfilebeat_agent_fd

    if [[ $USED_RATE -ge $TARGET_RATE ]]
    then
        log_gc_by_file
    fi
    check_vfilebeat_agent_fd
fi

if [[ -f $HAS_LARGE_LOG_FILES ]]
then
    log "Start deleting log files larger than 80GB."
    while read FILE_ORIGINAL_PATH
    do
        FILE_REAL_PATH=`readlink -f "$FILE_ORIGINAL_PATH"`
        if echo "${FILE_REAL_PATH}" | egrep "^/apps/logs/" >/dev/null 2>&1
        then
            log_truncate "${FILE_REAL_PATH}"
        else
            log "[DENIED] Failed to delete file: ${FILE_ORIGINAL_PATH}"
        fi
    done< <(find /apps/logs/ -type f -size +80G 2>/dev/null)
    rm -f $HAS_LARGE_LOG_FILES
    check_vfilebeat_agent_fd
fi

log_used_info
log "Done! (Cost: $(get_cost_seconds)s)"

xcall 脚本

#!/bin/bash

# 获取控制台指令

cmd=$*

# 判断指令是否为空
if [ ! -n "$cmd" ]
then
        echo "command can not be null !"
        exit
fi

# 获取当前登录用户
user=`whoami`

# 在从机执行指令,这里需要根据你具体的集群情况配置,host与具体主机名一致,同上
for (( host=1;host<=3;host++ ))
do
        echo "================current host is linux0$host================="
        echo "--> excute command \"$cmd\""
        ssh $user@linux0$host  "source /etc/profile; $cmd"
done

echo "excute successfully !"
~                              

xsync文件同步脚本

#!/bin/bash
#1 获取输入参数个数,如果没有参数,直接退出
pcount=$#
if((pcount==0)); then
echo no args;
exit;
fi

#2 获取文件名称
p1=$1
fname=`basename $p1`
echo fname=$fname

#3 获取上级目录到绝对路径
pdir=`d -P $(dirname $p1); pwd`
echo pdir=$pdir

#4 获取当前用户名称
user=`whoami`

#5 循环,这里host根据自己的节点数和主机名设置
for((host=1; host<4; host++)); do
        #echo $pdir/$fname $user@linux0$host:$pdir
        echo --------------- linux0$host ----------------
        rsync -rvl $pdir/$fname $user@linux0$host:$pdir
done
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值