#/bin/bash
## ********************************************
## 名 称:data_backup.sh
## 功能描述:备份copy.db,new.db增量路径数据到bak.db,增量保留最近2天的数据
## 创建人员:ypg
## 创建日期:
## 修改信息:
## 版 本: V1.0
## 起调方式:sh data_backup.sh backup_tables.txt 30 20201101
## 参数说明:$1:配置文件,跟脚本同级目录
## $2:并行度
## $3:etl_date 不传默认1天前
## ********************************************
#加载环境
source ~/.bash_profile
hd_source
local_path=/shell/loaddata
copy_path=/user/hive/warehouse/copy.db
new_path=/user/hive/warehouse/new.db
back_path=/user/hive/warehouse/bak.db
#默认1天前
one_day_ago=`date -d"2 day ago" +%Y%m%d`
if [ $3'X' != 'X' ];then
one_day_ago=$3
fi
echo `date +"%Y-%m-%d %H:%M:%S"`" | 开始备份${one_day_ago}的数据。"
# 用户可定义的常量, 2个并发
THREAD=$2
# 不需要改变的常量
TMPFILE=$$.fifo
# 公共代码开始
mkfifo ${TMPFILE}
exec 7<>${TMPFILE}
rm -f ${TMPFILE}
for (( i = 0; i < ${THREAD}; i++ ))
do
echo >&7
done
#备份一天前的数据,并将小文件合并
function data_bak_fun(){
#如果bak路径没有该表路径,则创建
#hadoop fs -test -d ${back_path}/${tableName}/etl_date=${one_day_ago} || hdfs dfs -mkdir -p ${back_path}/${tableName}/etl_date=${one_day_ago}
#如果copy路径存在前一天分区,则进行备份
hdfs dfs -test -d ${copy_path}/${tableName}/etl_date=${one_day_ago}
if [ $? -eq 0 ]; then
hadoop fs -test -d ${back_path}/${tableName}/etl_date=${one_day_ago} || hdfs dfs -mkdir -p ${back_path}/${tableName}/etl_date=${one_day_ago}
hdfs dfs -getmerge ${copy_path}/${tableName}/etl_date=${one_day_ago}/* /data/tablebak/${tableName}.txt
hdfs dfs -put -f /data/tablebak/${tableName}.txt ${back_path}/${tableName}/etl_date=${one_day_ago}/backup-${one_day_ago}
if [ $? -eq 0 ]; then
hdfs dfs -rm -r ${copy_path}/${tableName}/etl_date=${one_day_ago}
echo `date +"%Y-%m-%d %H:%M:%S"`" | finish,${tableName} 数据备份完成"
rm -f /data/tablebak/${tableName}.txt
rm -f /data/tablebak/.${tableName}.*
else
sleep 20s
hdfs dfs -put -f /data/tablebak/${tableName}.txt ${back_path}/${tableName}/etl_date=${one_day_ago}/backup-${one_day_ago}
if [ $? -eq 0 ]; then
hdfs dfs -rm -r ${copy_path}/${tableName}/etl_date=${one_day_ago}
echo `date +"%Y-%m-%d %H:%M:%S"`" | finish,${tableName} 数据备份完成"
rm -f /data/tablebak/${tableName}.txt
rm -f /data/tablebak/.${tableName}.*
else
echo `date +"%Y-%m-%d %H:%M:%S"`" | ERROR,${tableName} 数据备份失败"
fi
fi
else
#如果new路径存在前一天分区,则进行备份
hdfs dfs -test -d ${new_path}/${tableName}/etl_date=${one_day_ago}
if [ $? -eq 0 ]; then
hadoop fs -test -d ${back_path}/${tableName}/etl_date=${one_day_ago} || hdfs dfs -mkdir -p ${back_path}/${tableName}/etl_date=${one_day_ago}
hdfs dfs -getmerge ${new_path}/${tableName}/etl_date=${one_day_ago}/* /data/tablebak/${tableName}.txt
hdfs dfs -put -f /data/tablebak/${tableName}.txt ${back_path}/${tableName}/etl_date=${one_day_ago}/backup-${one_day_ago}
if [ $? -eq 0 ]; then
hdfs dfs -rm -r ${new_path}/${tableName}/etl_date=${one_day_ago}
echo `date +"%Y-%m-%d %H:%M:%S"`" | finish,${tableName} 数据备份完成"
rm -f /data/tablebak/${tableName}.txt
rm -f /data/tablebak/.${tableName}.*
else
sleep 20s
hdfs dfs -put -f /data/tablebak/${tableName}.txt ${back_path}/${tableName}/etl_date=${one_day_ago}/backup-${one_day_ago}
if [ $? -eq 0 ]; then
hdfs dfs -rm -r ${new_path}/${tableName}/etl_date=${one_day_ago}
echo `date +"%Y-%m-%d %H:%M:%S"`" | finish,${tableName} 数据备份完成"
rm -f /data/tablebak/${tableName}.txt
rm -f /data/tablebak/.${tableName}.*
else
echo `date +"%Y-%m-%d %H:%M:%S"`" | ERROR,${tableName} 数据备份失败"
fi
fi
fi
fi
}
#遍历处理
for tableName in `cat ${local_path}/$1`
do
read -u7
{
data_bak_fun ${tableName}
echo >&7
} &
done
# 关闭通道
exec 7>&-
echo `date +"%Y-%m-%d %H:%M:%S"`" | 结束备份${one_day_ago}的数据。"
echo "finished"
shell脚本实现每天备份hdfs增量数据
最新推荐文章于 2024-07-12 10:22:10 发布