spark定时删日志脚本

引用原文 https://blog.csdn.net/mtj66/article/details/80538409#commentBox

加以改进问题点.

########################start ########################
#删除很简单,但是不能删除一些正在运行的程序的日志,尤其是spark streaming 的日志文件,一旦删除无法再生,以后想查日志都难.
#下面的脚本实现删除一个月之前的spark application的历史文件,记录下,大伙可以参考修改使用.
 #! /bash  
# delete 30 day's before spark history logs 
days=30
day_01=`date -d "-${days} day" +%Y-%m-%d`
#running_array=(`yarn application -list | grep application_1 | awk '{print  $1}' > running.log `)
#running_array=(`more running.log`)
running_array=(`yarn application -list | grep application_1 | awk '{print  $1}' `)
len=${#running_array[*]} 
len=$(($len-1)) 
running_string=""
for ((i=0; i< ${#running_array[*]}; i++))
do
echo "----${running_array[$i]}'\|' " 
if [ $i -lt $len ];then
    running_string+=${running_array[$i]}"\|"
    echo "less n-1 : $len ====="
else
    running_string+=${running_array[$i]}
    echo "== n-1 :${#running_array[*]} "
fi
done
echo $running_string 

hdfs dfs -ls /user/spark/applicationHistory/ \
| grep application_ \
| grep -v $running_string \
| awk -v day_01_tmp=$day_01 '{if( $6 < day_01_tmp) print $8 }' >/tmp/spark_log_del.list


cat /tmp/spark_log_del.list|while read line
do
	echo " `date +%F\ %T` to delete: $line "
   #hdfs dfs -rm -r $line
done
exit 0

#===========下面这段有时候不好用===================================================

history_logs_to_delete=(`hdfs dfs -ls /user/spark/applicationHistory/ | grep application_ | grep -v $running_string | awk -v day_01=$day_01 '{if( $6 lt day_01) print $8 } ' `)
for ((j=0; j< ${#history_logs_to_delete[*]}; j++))
do
history_logs_str=${history_logs_to_delete[$j]}
echo " `date +%F\ %T` to delete $j ==$history_logs_str " >> history_logs_to_delete.log 
# hdfs dfs -rm -r $history_logs_str
done
########################## end #################################

 

展开阅读全文

没有更多推荐了,返回首页