#!/bin/sh
#数据库连接
#srcConnect="jdbc:sqlserver://10.2.5.231\MSSQLSERVER2014;username=dmData;password=data!23;database=GMVMonitorCenter_Test;characterEncoding=utf8"
#表名
dstTabName=wp04_0013_data
checkColumn="RecoID"
condiColumn="a.[记录时间]"
startRecoid=2015-10-27\ 00:00:00
plus=1
endRecodid=2017-01-11\ 00:00:00
echo "from ${startRecoid} to ${endRecodid}"
COUNTDAY=0
#hbase表名
hbaseTableName=wp04_0013_data_
#hbase存放的目录
hbaseTablePath=/user/hbase/multisplit_biz1
#临时表目录
TemphbasePath=/user/hbase/multisplit_biz1/${hbaseTableName}Temp
#hive数据库名
hiveDbName=gree_monitorcenter_biz1
#hive中临时表名
tempTabName=${dstTabName}Temp
#hive数据库中的表名
hiveTableName=wp04_0013_data
hiveTableCols="RowKey string,c1 string"
#要执行sql
columnsArry=(ProjID
sql=" select RowKey,concat(RecoTime,',',"
for column in ${columnsArry[@]}; do
if [[ "${column}" = "ReceiveTime" ]]; then
sql=${sql}"'',',',"
else
sql=${sql}"(case when ${column} is null then '' else ${column} end),',',"
fi
done
sql=${sql}"(case when GroupCtrlDataSign is null then '' else GroupCtrlDataSign end)) as c1 from ${hiveTableName} "
#分批导入
while [[ $startRecoid != $endRecodid ]]; do
inter=`date -d "1 day $startRecoid" +%Y-%m-%d\ %H:%M:%S`
temp_endRecodid=`echo $endRecodid | awk '{print $1}'`
end_time_s=`date -d $temp_endRecodid +%s`
temp_inter=`echo $inter | awk '{print $1}'`
temphbase_inter=`echo $inter | awk '{print $1}' | tr -d "-"`
inter_time_s=`date -d $temp_inter +%s`
if [ $inter_time_s -gt $end_time_s ]
then
inter=$endRecodid
fi
temp_startRecoid=`echo $startRecoid | awk '{print $1}' | tr -d "-"`
hadoop fs -rm -r ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid}
#如果临时表存在,删除临时表
hive -e "use $hiveDbName; \
drop table if exists $tempTabName;"
hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid}/* 1>/dev/null 2>&1
hadoop fs -mkdir ${TemphbasePath}${temp_startRecoid} 1>/dev/null 2>&1
#重新创建临时表
hive -e "use $hiveDbName;\
create external table $tempTabName ($hiveTableCols) row format delimited fields terminated by '\t' location \"${TemphbasePath}${temp_startRecoid}\";"
echo "++++++++++++++++++++开始导入hbases数据:++++++++++++++++++++++++++++++++"
echo " insert into table $tempTabName ${sql} where year=year('$startRecoid') and month=month('$startRecoid') and day=day('$startRecoid') order by RowKey asc ; "
hive -e "\
set mapreduce.job.queuename=production; \
use $hiveDbName;\
insert into table $tempTabName ${sql} where year=year('$startRecoid') and month=month('$startRecoid') and day=day('$startRecoid') order by RowKey asc ;"
startRecoid=$inter
COUNTDAY=`expr $COUNTDAY + 1`
partf1=`hadoop fs -du -s ${TemphbasePath}${temp_startRecoid} |awk '{print $1 }'`
if [ "$partf1" == "0" ]
then
hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid}/* 1>/dev/null 2>&1
echo "---------------file is null partf1 :$partf1-----------------------"
continue
fi
hbase org.apache.hadoop.hbase.mapreduce.ImportTsv '-Dimporttsv.separator= ' -Dimporttsv.columns='HBASE_ROW_KEY,f1:c1' -Dimporttsv.bulk.output=${hbaseTablePath}/${hbaseTableName}${temp_startRecoid} biz1:${hbaseTableName}${temp_startRecoid} ${TemphbasePath}${temp_startRecoid}
echo "此时的查询时间值为:+++++++++++++++++++++++++++++${temp_startRecoid}"
echo "此时的hbase存放目录值为:+++++++++++++++++++++++++++++${hbaseTablePath}/${hbaseTableName}${temp_startRecoid}"
echo "此时的表名为:+++++++++++++++++++++++++++++${hbaseTableName}${temp_startRecoid}"
echo "表临时文件目录为:+++++++++++++++++++++++++++++${TemphbasePath}${temp_startRecoid}"
hadoop dfs -chmod -R 777 ${hbaseTablePath}
#从hfile中load到hbase 表中(这步可以生成表)
hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid} biz1:${hbaseTableName}${temp_startRecoid}
hadoop fs -rm -r ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid}
hive -e "use $hiveDbName; \
drop table if exists $tempTabName;"
hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid} 1>/dev/null 2>&1
echo "-------------循环 $COUNTDAY 次完成---------------------------"
done
echo "load Data all over\n" >> log.out
#数据库连接
#srcConnect="jdbc:sqlserver://10.2.5.231\MSSQLSERVER2014;username=dmData;password=data!23;database=GMVMonitorCenter_Test;characterEncoding=utf8"
#表名
dstTabName=wp04_0013_data
checkColumn="RecoID"
condiColumn="a.[记录时间]"
startRecoid=2015-10-27\ 00:00:00
plus=1
endRecodid=2017-01-11\ 00:00:00
echo "from ${startRecoid} to ${endRecodid}"
COUNTDAY=0
#hbase表名
hbaseTableName=wp04_0013_data_
#hbase存放的目录
hbaseTablePath=/user/hbase/multisplit_biz1
#临时表目录
TemphbasePath=/user/hbase/multisplit_biz1/${hbaseTableName}Temp
#hive数据库名
hiveDbName=gree_monitorcenter_biz1
#hive中临时表名
tempTabName=${dstTabName}Temp
#hive数据库中的表名
hiveTableName=wp04_0013_data
hiveTableCols="RowKey string,c1 string"
#要执行sql
columnsArry=(ProjID
DevID
...........
PC_M12)sql=" select RowKey,concat(RecoTime,',',"
for column in ${columnsArry[@]}; do
if [[ "${column}" = "ReceiveTime" ]]; then
sql=${sql}"'',',',"
else
sql=${sql}"(case when ${column} is null then '' else ${column} end),',',"
fi
done
sql=${sql}"(case when GroupCtrlDataSign is null then '' else GroupCtrlDataSign end)) as c1 from ${hiveTableName} "
#分批导入
while [[ $startRecoid != $endRecodid ]]; do
inter=`date -d "1 day $startRecoid" +%Y-%m-%d\ %H:%M:%S`
temp_endRecodid=`echo $endRecodid | awk '{print $1}'`
end_time_s=`date -d $temp_endRecodid +%s`
temp_inter=`echo $inter | awk '{print $1}'`
temphbase_inter=`echo $inter | awk '{print $1}' | tr -d "-"`
inter_time_s=`date -d $temp_inter +%s`
if [ $inter_time_s -gt $end_time_s ]
then
inter=$endRecodid
fi
temp_startRecoid=`echo $startRecoid | awk '{print $1}' | tr -d "-"`
hadoop fs -rm -r ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid}
#如果临时表存在,删除临时表
hive -e "use $hiveDbName; \
drop table if exists $tempTabName;"
hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid}/* 1>/dev/null 2>&1
hadoop fs -mkdir ${TemphbasePath}${temp_startRecoid} 1>/dev/null 2>&1
#重新创建临时表
hive -e "use $hiveDbName;\
create external table $tempTabName ($hiveTableCols) row format delimited fields terminated by '\t' location \"${TemphbasePath}${temp_startRecoid}\";"
echo "++++++++++++++++++++开始导入hbases数据:++++++++++++++++++++++++++++++++"
echo " insert into table $tempTabName ${sql} where year=year('$startRecoid') and month=month('$startRecoid') and day=day('$startRecoid') order by RowKey asc ; "
hive -e "\
set mapreduce.job.queuename=production; \
use $hiveDbName;\
insert into table $tempTabName ${sql} where year=year('$startRecoid') and month=month('$startRecoid') and day=day('$startRecoid') order by RowKey asc ;"
startRecoid=$inter
COUNTDAY=`expr $COUNTDAY + 1`
partf1=`hadoop fs -du -s ${TemphbasePath}${temp_startRecoid} |awk '{print $1 }'`
if [ "$partf1" == "0" ]
then
hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid}/* 1>/dev/null 2>&1
echo "---------------file is null partf1 :$partf1-----------------------"
continue
fi
hbase org.apache.hadoop.hbase.mapreduce.ImportTsv '-Dimporttsv.separator= ' -Dimporttsv.columns='HBASE_ROW_KEY,f1:c1' -Dimporttsv.bulk.output=${hbaseTablePath}/${hbaseTableName}${temp_startRecoid} biz1:${hbaseTableName}${temp_startRecoid} ${TemphbasePath}${temp_startRecoid}
echo "此时的查询时间值为:+++++++++++++++++++++++++++++${temp_startRecoid}"
echo "此时的hbase存放目录值为:+++++++++++++++++++++++++++++${hbaseTablePath}/${hbaseTableName}${temp_startRecoid}"
echo "此时的表名为:+++++++++++++++++++++++++++++${hbaseTableName}${temp_startRecoid}"
echo "表临时文件目录为:+++++++++++++++++++++++++++++${TemphbasePath}${temp_startRecoid}"
hadoop dfs -chmod -R 777 ${hbaseTablePath}
#从hfile中load到hbase 表中(这步可以生成表)
hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid} biz1:${hbaseTableName}${temp_startRecoid}
hadoop fs -rm -r ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid}
hive -e "use $hiveDbName; \
drop table if exists $tempTabName;"
hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid} 1>/dev/null 2>&1
echo "-------------循环 $COUNTDAY 次完成---------------------------"
done
echo "load Data all over\n" >> log.out