#!/bin/bash
source ~/.bashrc
#HADOOP所在的bin目录
HADOOP_BIN_PATH=/opt/cloudera/parcels/CDH/bin
#待检测的HDFS目录
d1=/tmp1/flink
#将待检测的目录(可以为多个)加载至数组中
array_check=($d1)
#过期日期
m1=$(date -d “-30 days” +“%Y%m%d”)
#Func: 删除指定时间之前的过期
removeOutDate(){
echo $m1
$HADOOP_BIN_PATH/hdfs dfs -ls /tmp1/flink// |grep -E “^d” | awk -F ‘/’ -v a=$m1 ‘$5<a {print $0," ",$5}’ > tmp1dir
echo “ls dir completed!”
# 管道输入重定向: cat $FILENAME | while read 。。。 quanxian temp 将读取出来的一行拆分成各个参数使用!
cat tmp1dir | while read quanxian temp user group size day hour filepath filedate
do
echo
f
i
l
e
d
a
t
e
i
f
[
[
"
filedate if [[ "
filedateif[["filedate" == 20 ]];then
echo “deltime:
(
d
a
t
e
+
′
(date +'%Y-%m-%d %H:%M:%S') fileinfo:
(date+′filepath” >> deldirlog
#$HADOOP_BIN_PATH/hdfs dfs -rm -r $filepath > /dev/null 2>&1
fi
done
}
#Func: 执行删除
execute(){
echo -e “\n\n”
echo “
(
d
a
t
e
+
′
e
c
h
o
"
(date +'%Y-%m-%d %H:%M:%S') start to remove outdate files in hdfs" echo "
(date+′echo"(date +‘%Y-%m-%d %H:%M:%S’) today is: $(date +”%Y-%m-%d %H:%M:%S")"
for i in ${array_check[@]}
do
echo "$(date +'%Y-%m-%d %H:%M:%S') processing filepath: $i"
removeOutDate $i
echo -e "\n"
done
echo "$(date +'%Y-%m-%d %H:%M:%S') remove outdate files in hdfs finished"
echo -e "\n\n"
}
#开始执行
execute