#!/bin/sh
#数据库连接
srcConnect="jdbc:sqlserver://10.2.44.181:1433;username=mc_testor;password=0000002am;database=Gree_MonitorCenter_New"
#表名
dstTabName=wp04_0013_data
checkColumn="RowKey"
condiColumn="[记录时间]"
startRecoid=2017-01-06
plus=1
endRecodid=2017-01-11
echo "from ${startRecoid} to ${endRecodid}" > log.out
#hbase表名
hbaseTableName=wp04_0013_data_
#hbase存放的目录
hbaseTablePath=/user/hbase/biz1/wp04_0013_data
#hbase临时存放目录
hbasetempPath=/user/hive/biz1/${hbaseTableName}temp
#要执行sql
sql="select reverse(right('0000000000'+ltrim(a.[ProjDev_ID]),10))+
replace(CONVERT(varchar(12),a.[记录时间],108),':','')+'7F15'+right('000000'+ltrim([Reco_ID]),6)as RowKey
,CONVERT(varchar(19),a.[记录时间],121)
+','+ cast (b.Proj_ID as varchar )
+','+ cast (a.[ProjDev_ID] as varchar )
+','+ '224'
+','+ ''
+','+ ''
+','+ ''
+','+ '7F15'
+','+ cast (a.[Reco_ID] as varchar )
+','+'' as c1
from [dbo].[0013_7F15_IDUStateInfo_RSimulateData] a left join Proj_Dev b on a.ProjDev_ID=b.ProjDev_ID "
#分批导入
while [[ $startRecoid != $endRecodid ]]; do
inter=`date -d "1 day $startRecoid" +%Y-%m-%d`
temp_endRecodid=`echo $endRecodid | awk '{print $1}'`
end_time_s=`date -d $temp_endRecodid +%s`
temp_inter=`echo $inter | awk '{print $1}'`
temphbase_inter=`echo $inter | awk '{print $1}' | tr -d "-"`
inter_time_s=`date -d $temp_inter +%s`
if [ $inter_time_s -gt $end_time_s ]
then
inter=$endRecodid
fi
temp_startRecoid=`echo $startRecoid | awk '{print $1}' | tr -d "-"`
starttime=${startRecoid}" 00:00:00:000" # by ljn add
endtime=${startRecoid}" 23:59:59:997" # by ljn add 2015-11-22 23:58:01.000 23:59:59:997
hadoop fs -rm -r ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid}
#hadoop fs -mkdir ${hbaseTablePath} 1>/dev/null 2>&1
hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid} 1>/dev/null 2>&1
echo "开始导入hbases数据:++++++++++++++++++++++++++++++++"
sqoop import -D mapreduce.job.queuename=production --connect "${srcConnect}" \
--query "select t.* from (${sql} where ${condiColumn}>='${starttime}' and ${condiColumn}<='${endtime}' ) t WHERE \$CONDITIONS" \
--split-by ${checkColumn} --fields-terminated-by "\t" --lines-terminated-by "\n" \
--target-dir ${TemphbasePath}${temp_startRecoid} \
-m 15
hadoop fs -test -e ${TemphbasePath}${temp_startRecoid}
if [ $? -ne 0 ]
then
echo "+++++++++++++++++++++++文件不存在+++++++++++++++++++++++++"
echo "+++++++++++++++++++++现在截断时间是: $startRecoid +++++++++++++++"
break
fi
startRecoid=$inter
partf1=`hadoop fs -du -s ${TemphbasePath}${temp_startRecoid} |awk '{print $1 }'`
if [ "$partf1" == "0" ]
then
hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid} 1>/dev/null 2>&1
echo "---------------file is null partf1 :$partf1-----------------------"
continue
fi
hbase org.apache.hadoop.hbase.mapreduce.ImportTsv '-Dimporttsv.separator= ' -Dimporttsv.columns='HBASE_ROW_KEY,f1:c1' -Dimporttsv.bulk.output=${hbaseTablePath}/${hbaseTableName}${temp_startRecoid} biz1:${hbaseTableName}${temp_startRecoid} ${TemphbasePath}${temp_startRecoid}
echo "此时的查询时间值为:+++++++++++++++++++++++++++++${temp_startRecoid}"
echo "此时的hbase存放目录值为:+++++++++++++++++++++++++++++${hbaseTablePath}/${hbaseTableName}${temp_startRecoid}"
echo "此时的表名为:+++++++++++++++++++++++++++++${hbaseTableName}${temp_startRecoid}"
echo "表临时文件目录为:+++++++++++++++++++++++++++++${TemphbasePath}${temp_startRecoid}"
#ps:
#f1为列簇名 row key必须指定
#-Dimporttsv.bulk.output 指定目录
#smy_test 要生成的表(表不会生成)
#/user/hdfs/sqlserver hdfs存放数据的目录
hadoop dfs -chmod -R 777 ${hbaseTablePath}
#从hfile中load到hbase 表中(这步可以生成表)
hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid} biz1:${hbaseTableName}${temp_startRecoid}
hadoop fs -rm -r ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid}
hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid} 1>/dev/null 2>&1
echo "-------------循环完成---------------------------"
done
echo "load Data all over\n" >> log.out
#数据库连接
srcConnect="jdbc:sqlserver://10.2.44.181:1433;username=mc_testor;password=0000002am;database=Gree_MonitorCenter_New"
#表名
dstTabName=wp04_0013_data
checkColumn="RowKey"
condiColumn="[记录时间]"
startRecoid=2017-01-06
plus=1
endRecodid=2017-01-11
echo "from ${startRecoid} to ${endRecodid}" > log.out
#hbase表名
hbaseTableName=wp04_0013_data_
#hbase存放的目录
hbaseTablePath=/user/hbase/biz1/wp04_0013_data
#hbase临时存放目录
hbasetempPath=/user/hive/biz1/${hbaseTableName}temp
#要执行sql
sql="select reverse(right('0000000000'+ltrim(a.[ProjDev_ID]),10))+
replace(CONVERT(varchar(12),a.[记录时间],108),':','')+'7F15'+right('000000'+ltrim([Reco_ID]),6)as RowKey
,CONVERT(varchar(19),a.[记录时间],121)
+','+ cast (b.Proj_ID as varchar )
+','+ cast (a.[ProjDev_ID] as varchar )
+','+ '224'
+','+ ''
+','+ ''
+','+ ''
+','+ '7F15'
+','+ cast (a.[Reco_ID] as varchar )
+','+'' as c1
from [dbo].[0013_7F15_IDUStateInfo_RSimulateData] a left join Proj_Dev b on a.ProjDev_ID=b.ProjDev_ID "
#分批导入
while [[ $startRecoid != $endRecodid ]]; do
inter=`date -d "1 day $startRecoid" +%Y-%m-%d`
temp_endRecodid=`echo $endRecodid | awk '{print $1}'`
end_time_s=`date -d $temp_endRecodid +%s`
temp_inter=`echo $inter | awk '{print $1}'`
temphbase_inter=`echo $inter | awk '{print $1}' | tr -d "-"`
inter_time_s=`date -d $temp_inter +%s`
if [ $inter_time_s -gt $end_time_s ]
then
inter=$endRecodid
fi
temp_startRecoid=`echo $startRecoid | awk '{print $1}' | tr -d "-"`
starttime=${startRecoid}" 00:00:00:000" # by ljn add
endtime=${startRecoid}" 23:59:59:997" # by ljn add 2015-11-22 23:58:01.000 23:59:59:997
hadoop fs -rm -r ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid}
#hadoop fs -mkdir ${hbaseTablePath} 1>/dev/null 2>&1
hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid} 1>/dev/null 2>&1
echo "开始导入hbases数据:++++++++++++++++++++++++++++++++"
sqoop import -D mapreduce.job.queuename=production --connect "${srcConnect}" \
--query "select t.* from (${sql} where ${condiColumn}>='${starttime}' and ${condiColumn}<='${endtime}' ) t WHERE \$CONDITIONS" \
--split-by ${checkColumn} --fields-terminated-by "\t" --lines-terminated-by "\n" \
--target-dir ${TemphbasePath}${temp_startRecoid} \
-m 15
hadoop fs -test -e ${TemphbasePath}${temp_startRecoid}
if [ $? -ne 0 ]
then
echo "+++++++++++++++++++++++文件不存在+++++++++++++++++++++++++"
echo "+++++++++++++++++++++现在截断时间是: $startRecoid +++++++++++++++"
break
fi
startRecoid=$inter
partf1=`hadoop fs -du -s ${TemphbasePath}${temp_startRecoid} |awk '{print $1 }'`
if [ "$partf1" == "0" ]
then
hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid} 1>/dev/null 2>&1
echo "---------------file is null partf1 :$partf1-----------------------"
continue
fi
hbase org.apache.hadoop.hbase.mapreduce.ImportTsv '-Dimporttsv.separator= ' -Dimporttsv.columns='HBASE_ROW_KEY,f1:c1' -Dimporttsv.bulk.output=${hbaseTablePath}/${hbaseTableName}${temp_startRecoid} biz1:${hbaseTableName}${temp_startRecoid} ${TemphbasePath}${temp_startRecoid}
echo "此时的查询时间值为:+++++++++++++++++++++++++++++${temp_startRecoid}"
echo "此时的hbase存放目录值为:+++++++++++++++++++++++++++++${hbaseTablePath}/${hbaseTableName}${temp_startRecoid}"
echo "此时的表名为:+++++++++++++++++++++++++++++${hbaseTableName}${temp_startRecoid}"
echo "表临时文件目录为:+++++++++++++++++++++++++++++${TemphbasePath}${temp_startRecoid}"
#ps:
#f1为列簇名 row key必须指定
#-Dimporttsv.bulk.output 指定目录
#smy_test 要生成的表(表不会生成)
#/user/hdfs/sqlserver hdfs存放数据的目录
hadoop dfs -chmod -R 777 ${hbaseTablePath}
#从hfile中load到hbase 表中(这步可以生成表)
hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid} biz1:${hbaseTableName}${temp_startRecoid}
hadoop fs -rm -r ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid}
hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid} 1>/dev/null 2>&1
echo "-------------循环完成---------------------------"
done
echo "load Data all over\n" >> log.out