要求:以时间为分区,格式为dt='yyyy-MM-dd'
#!/bin/bash
# 计算日期
seven_days_ago=$(date +%Y-%m-%d -d "-7 day")
# 获取所有以ods_开头的表
tables=$(hive -e "show tables in jinpeng" | grep "^ods_")
# 遍历表
for table in $tables
do
# 获取表的存储路径
table_location=$(hive -e "describe formatted jinpeng.$table" | grep "^Location" | cut -d" " -f5- | xargs)
# 遍历分区
partitions=$(hive -e "show partitions jinpeng.$table")
for partition in $partitions
do
partition_dt=$(echo $partition | cut -d= -f2)
# 判断分区是否需要删除
if [[ $partition_dt < $seven_days_ago ]]
then
echo "Deleting table jinpeng.$table partition $partition..."
# 删除元数据
hive -e "ALTER TABLE jinpeng.$table DROP IF EXISTS PARTITION ($partition);"
# 构建存储路径
storage_path="${table_location}/${partition}"
# 删除真实数据
hdfs dfs -rm -r ${storage_path}
fi
done
done