linux下删除备份文件,Linux备份-删除指定日期内文件

#!/usr/bin/env bash

source /etc/profile

echo " *************** start filter *************** "

# get befor six month last day

#m0=$(date -d "$(date -d 'month' +%Y%m01) -1 day" +%Y%m%d)

#echo ${m0}

#m1=$(date -d "$(date -d '0 month' +%Y%m01) -1 day" +%Y%m%d)

#echo ${m1}

#m2=$(date -d "$(date -d last-month +%Y%m01) -1 day" +%Y%m%d)

#echo ${m2}

#m3=$(date -d "$(date -d ${m2} +%Y%m01) -1 day" +%Y%m%d)

#echo ${m3}

#m4=$(date -d "$(date -d ${m3} +%Y%m01) -1 day" +%Y%m%d)

#echo ${m4}

#m5=$(date -d "$(date -d ${m4} +%Y%m01) -1 day" +%Y%m%d)

#echo ${m5}

#m6=$(date -d "$(date -d ${m5} +%Y%m01) -1 day" +%Y%m%d)

#echo ${m6}

# 取得当前月的最后一天,访问数组长度:${#m[*]} + ${#m[@]}

m[0]=$(date -d "$(date -d 'month' +%Y%m01) -1 day" +%Y%m%d)

echo m0 : ${m[0]} ' month : ' ${#m[@]}

for n in $(seq 0 11); do

m[$n+1]=$(date -d "$(date -d ${m[$n]} +%Y%m01) -1 day" +%Y%m%d)

echo m$[$n+1] : ${m[$n+1]} ' month : ' ${#m[*]};

done

echo " ****** time : " $(date '+%Y-%m-%d %H:%M:%S') " ****** "

max_date=0

# get the latest file and copy to hdfs

cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter

for dir in $(ls -l ./ | awk '/^d/{print $NF}')

do

if [[ -d $dir && $dir == *\_* ]]; then

f_d=$(echo $dir | cut -d \_ -f 3 | cut -d . -f 1)

if [[ $max_date < $f_d ]]; then

max_date=$f_d

max_filter=$dir

fi

fi

done

echo " max date is : "$max_date

echo " max filter is : "$max_filter

pwd

# 复制最近日期的filter文件到hdfs

hadoop fs -test -e /data/datacenter/run_center_spark_stream/bloom_filters/$max_filter

if [[ $? == 0 ]]; then

echo " filter is already exist : "$max_filter

else

echo " start hdfs copy "

echo " ****** start time : " $(date '+%Y-%m-%d %H:%M:%S') " ****** "

hadoop fs -put $max_filter /data/datacenter/run_center_spark_stream/bloom_filters

echo " ****** end time : " $(date '+%Y-%m-%d %H:%M:%S') " ****** "

fi

remove_week=$(date -d "$max_date 7 days ago" +%Y%m%d)

echo " 删除本地序列化文件的日期界限:"$remove_week

remove_date=$(date -d "$max_date 30 days ago" +%Y%m%d)

echo " 删除文件 和 Hadoop filter 的日期界限:"$remove_date

echo " *************** start remove filter *************** "

for r_dir in $(ls -l ./ | awk '/^d/{print $NF}')

do

if [[ -d $r_dir && $r_dir == *\_* ]]; then

r_d=$(echo $r_dir | cut -d \_ -f 3 | cut -d . -f 1)

if [[ $r_d < $remove_date ]]; then

if [[ ${m[*]} == *$r_d* ]]; then

cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter/$r_dir

pwd

for f_dir in $(ls *)

do

if [[ "$f_dir" == "mau_device_all.FILTER.SER" ]]; then

echo " ------ keep mau_filter is: " $f_dir;

else

echo " remove file is: " $f_dir;

rm -r $f_dir

fi

done

cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter

pwd

else

echo " remove filter_dir is: "$r_dir

rm -r $r_dir

fi

elif [[ $r_d < $remove_week ]]; then

if [[ $r_d == $m0 || $r_d == $m1 || $r_d == $m2 ]]; then

cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter/$r_dir

pwd

for f_dir in $(ls *)

do

if [[ "$f_dir" == "mau_device_all.FILTER.SER" ]]; then

echo " ------ week keep mau_filter is: " $f_dir;

else

if [[ "$f_dir" == *.FILTER.SER ]]; then

echo " - last day of month - week remove file is: " $f_dir;

rm -r $f_dir

fi

fi

done

cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter

pwd

else

echo "week remove filter is: "$r_dir

rm -r $r_dir/*.FILTER.SER

fi

fi

fi

done

echo " =============== start remove hdfs filter =============== "

# 删除hdfs上指定日期外的tdid

for h_filter in $(hadoop fs -ls /data/datacenter/run_center_spark_stream/bloom_filters | awk '{print $8}')

do

if [[ $h_filter == *\_* ]]; then

h_date=$(echo $h_filter | cut -d / -f 6 | cut -d \_ -f 3 | cut -d . -f 1)

#        echo " hdfs date : "$h_date

#        echo " hdfs filter : "$h_filter

if [[ ${m[*]} == *$h_date* ]]; then

echo " remain hdfs filter is : "$h_filter

elif [[ $h_date < $remove_date ]]; then

echo "not remain date is : "$h_date

echo "remove hdfs filter is : "$h_filter

hadoop fs -rmr $h_filter

fi

fi

done

echo " -------------- start tdid --------------- "

# 删除小于30天的tdid

cd /home/hadoop/streaming_run_center/tmp/checkpoint/tdidinfo

for tdid in $(ls *)

do

if [[ $tdid == *\_* ]]; then

t_d=$(echo $tdid | cut -d \_ -f 2 | cut -d . -f 1)

if [[ $t_d == $max_date || $t_d > $max_date ]]; then

echo " need copy date : "$t_d

echo " need copy tdid : "$tdid

# 检查tdid是否存在

#            hadoop fs -test -e jiaojiao/tdid/$tdid

#            if [[ $? == 0 ]]; then

#                echo " tdid is already exist,remove it first "

#                hadoop fs -rm jiaojiao/tdid/$tdid

#                hadoop fs -put $tdid jiaojiao/tdid

#            else

#                echo " start copy "

#                hadoop fs -put $tdid jiaojiao/tdid

#            fi

elif [[ $t_d < $remove_date ]]; then

echo " remove tdid : "$tdid

rm $tdid

fi

fi

done

#echo " =============== start remove hdfs tdid ===============  "

#for h_tdid in $(hadoop fs -ls jiaojiao/tdid | awk '{print $8}')

#do

#    if [[ $h_tdid == *\_* ]]; then

#        h_date=$(echo $h_tdid | cut -d \_ -f 2 | cut -d . -f 1)

#        echo $h_date

#        echo $h_tdid

#    fi

#done

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值