#!/bin/bash
###################################################################
### 功能:批量加载sql文件到holo库(整库加载)
### 执行:sh bulk_load_files_holo.sh /data/datadd/
### 插入目标表:库名_表名
####################################################################
path=$(cd $(dirname $0);pwd)
#file_path=$1
echo "$(date "+%Y-%m-%d %H:%M:%S") 参数 ===>> path:${file_path} " > ${path}/log.log
## 存放新文件目录
dest_path=${path}/data
if [ ! -d ${dest_path} ];then
mkdir -p ${dest_path}
else
echo "$(date "+%Y-%m-%d %H:%M:%S") -- ${dest_path}目录已经存在" >> ${path}/log.log
fi
dealContentAndSplit(){
PATH_FILE=$1
#获取库名称表明
db_table_name=$(echo ${PATH_FILE}|cut -d '.' -f 1,2)
table_name=$(echo ${PATH_FILE}|cut -d '.' -f 2)
new_table_name=ods.$(echo ${db_table_name} | sed 's/\./_/g')
echo "$(date "+%Y-%m-%d %H:%M:%S") -- PATH_FILE:${PATH_FILE} --- table_name:${table_name} --new_table_name:${new_table_name} " >> ${path}/log.log
#split 拆分文件 50w行 拆分一个文件(跟进实际情况输入行数)
#split -l1000 -d sellermanage.t_sps_channel.sql sellermanage.t_sps_channel.sql_
#拆分文件过程
split_file=`echo ${PATH_FILE} | cut -d \. -f 2`
echo "$(date "+%Y-%m-%d %H:%M:%S") -- 创建存放拆分文件目录 -- ${dest_path}/${split_file} " >> ${path}/log.log
if [ ! -d ${dest_path}/${split_file} ];then
mkdir -p ${dest_path}/${split_file}
else
echo "$(date "+%Y-%m-%d %H:%M:%S") -- ${dest_path}/${split_file} 已经存在" >> ${path}/log.log
fi
echo "$(date "+%Y-%m-%d %H:%M:%S") -- 拆分文件开始 -- ${PATH_FILE} " >> ${path}/log.log
split -l 80000 -d ${PATH_FILE} ${dest_path}/${split_file}/${split_file}_
echo "$(date "+%Y-%m-%d %H:%M:%S") -- 拆分文件完成 -- ${PATH_FILE} " >> ${path}/log.log
#获取文件下的所有文件
dir=$(ls -l ${dest_path}/${split_file} |awk '/^-/ {print $NF}')
for ifile in $dir
do
echo "$(date "+%Y-%m-%d %H:%M:%S") 开始处理分割后文件 ${dest_path}/${split_file}/${ifile} " >> ${path}/log.log
#判断文件编码
filecode=`file ${dest_path}/${split_file}/${ifile}`
echo "$(date "+%Y-%m-%d %H:%M:%S") -- ${dest_path}/${split_file}/${ifile}编码:${filecode} "
if [[ ${filecode} != *UTF-8* ]] ; then
echo "$(date "+%Y-%m-%d %H:%M:%S") -- ${dest_path}/${split_file}/${ifile}不是utf8,转码........." >> ${path}/log.log
iconv -f GBK -t UTF-8 ${dest_path}/${split_file}/${ifile} -o ${dest_path}/${split_file}/${ifile}_utf8
if [ -f ${dest_path}/${split_file}/${ifile} ]; then
rm -rf ${dest_path}/${split_file}/${ifile}
mv ${dest_path}/${split_file}/${ifile}_utf8 ${dest_path}/${split_file}/${ifile}
fi
fi
#文件内容替换
echo "$(date "+%Y-%m-%d %H:%M:%S") -- 开始替换 ${dest_path}/${split_file}/${ifile} " >> ${path}/log.log
sed -e"s/\"/'/g" -e"s/\\\'/\\\\\"/g" -e's/`//g' -e"s/);/)on conflict do nothing;/" -e"s/\<${table_name}\>/${new_table_name}/g" -i ${dest_path}/${split_file}/${ifile}
sed -e "s/'</E'</g" -i ${dest_path}/${split_file}/${ifile}
## 数据中包含"\\"特殊字符串,经过上面的sed替换后,变成了'\\",需要把后面一个双引号替换成单引号
sed -e "s/\\\"/\\'/g" -i ${dest_path}/${split_file}/${ifile}
#判断文件第一行是否INSERT;
fileN="${dest_path}/${split_file}/${ifile}"
echo "$(date "+%Y-%m-%d %H:%M:%S") -- ${fileN}" >> ${path}/log.log
insert_str=$(echo "head -n +1 ${fileN}"|awk '{run=$0;system(run)}'|cut -d ' ' -f 1)
echo "$(date "+%Y-%m-%d %H:%M:%S") -- ${insert_str} " >> ${path}/log.log
if [[ ${insert_str} == 'INSERT' ]] || [[ ${insert_str} == '/*!40101' ]] ; then
echo "$(date "+%Y-%m-%d %H:%M:%S") -- ${dest_path}/${split_file}/${ifile} 文件已经有insert 或者是 /*! 注释内容" >> ${path}/log.log
else
sed "1i INSERT INTO ${new_table_name} VALUES " -i ${dest_path}/${split_file}/${ifile}
fi
#最后一行增加on conflict do nothing;
list_n=$(echo "tail -n -1 ${fileN}"|awk '{run=$0;system(run)}'|awk '{print $NF}')
final=${list_n: -1} #文件是否是一个完成的INSERT sql语句(,;)区分
echo "$(date "+%Y-%m-%d %H:%M:%S") -- ${list_n} =========== ${final}" >> ${path}/log.log
#文件中分割后的逗号删除
if [ ${final} == ',' ] ; then
sed -e'$s/.$//' -i ${dest_path}/${split_file}/${ifile}
#删除后紧接接着增加nothing
sed '$a on conflict do nothing;' -i ${dest_path}/${split_file}/${ifile}
fi
#判断是否增加nothing
if [[ ${list_n} == 'nothing;' ]] || [[ ${final} == ',' ]] ; then
echo "$(date "+%Y-%m-%d %H:%M:%S") -- ${dest_path}/${split_file}/${ifile}文件已经有nothing; 或者最后 字符串异常" >> ${path}/log.log
else
sed '$a on conflict do nothing;' -i ${dest_path}/${split_file}/${ifile}
fi
echo "$(date "+%Y-%m-%d %H:%M:%S") -- 结束替换 ${dest_path}/${split_file}/${ifile}" >> ${path}/log.log
echo "$(date "+%Y-%m-%d %H:%M:%S") -- 开始插入文件数据到holo:${dest_path}/${split_file}/${ifile} " >> ${path}/log.log
psql -h hgprecn-cn-nif1voafm004-cn-shanghai-vpc.hologres.aliyuncs.com -p80 -ULTAI4G1HNWdobzPXkfjCGkbR -d rtdws -f ${dest_path}/${split_file}/${ifile}
echo "$(date "+%Y-%m-%d %H:%M:%S") -- 完成插入文件数据到holo:${dest_path}/${split_file}/${ifile} " >> ${path}/log.log
echo "$(date "+%Y-%m-%d %H:%M:%S") 结束处理分割后文件 ${dest_path}/${split_file}/${ifile} " >> ${path}/log.log
done
}
if [ "$#" -eq 1 ]; then
## 两个参数:文件路径、库名
file_path=$1
#database=$2
echo "$(date "+%Y-%m-%d %H:%M:%S") -- file_path:${file_path} ----- dest_path:${dest_path} -- ${file_path}log.log " >> ${path}/log.log
export PGPASSWORD=SbmghUcbaHg
dealContentAndSplit ${file_path}
else
echo "$(date "+%Y-%m-%d %H:%M:%S") -- 参数输入有误,请重新输入参数 ===>>【1】:file_path " >> ${path}/log.log
exit 1
fi