从hive往hbase导数据

#!/bin/sh
#数据库连接
#srcConnect="jdbc:sqlserver://10.2.5.231\MSSQLSERVER2014;username=dmData;password=data!23;database=GMVMonitorCenter_Test;characterEncoding=utf8"
#表名
dstTabName=wp04_0013_data
checkColumn="RecoID"
condiColumn="a.[记录时间]"
startRecoid=2015-10-27\ 00:00:00
plus=1
endRecodid=2017-01-11\ 00:00:00
echo "from ${startRecoid} to ${endRecodid}" 
COUNTDAY=0
#hbase表名
hbaseTableName=wp04_0013_data_
#hbase存放的目录
hbaseTablePath=/user/hbase/multisplit_biz1
#临时表目录
TemphbasePath=/user/hbase/multisplit_biz1/${hbaseTableName}Temp


#hive数据库名
hiveDbName=gree_monitorcenter_biz1
#hive中临时表名
tempTabName=${dstTabName}Temp
#hive数据库中的表名
hiveTableName=wp04_0013_data
hiveTableCols="RowKey string,c1 string"

#要执行sql
columnsArry=(ProjID

DevID

...........

PC_M12)

sql=" select RowKey,concat(RecoTime,',',"
for column in ${columnsArry[@]}; do
  if [[ "${column}" = "ReceiveTime" ]]; then
    sql=${sql}"'',',',"
  else
    sql=${sql}"(case when ${column} is null then '' else ${column} end),',',"
  fi
done
sql=${sql}"(case when GroupCtrlDataSign is null then '' else GroupCtrlDataSign end)) as c1 from ${hiveTableName} "


#分批导入
while [[ $startRecoid != $endRecodid ]]; do
       inter=`date -d "1 day $startRecoid" +%Y-%m-%d\ %H:%M:%S`
   temp_endRecodid=`echo $endRecodid | awk '{print $1}'`
           end_time_s=`date -d $temp_endRecodid +%s`
   temp_inter=`echo $inter | awk '{print $1}'`
temphbase_inter=`echo $inter | awk '{print $1}' | tr -d "-"`
inter_time_s=`date -d $temp_inter +%s`
if [ $inter_time_s -gt $end_time_s ]
then
inter=$endRecodid
fi

temp_startRecoid=`echo $startRecoid | awk '{print $1}' | tr -d "-"`


hadoop fs -rm -r ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid}


#如果临时表存在,删除临时表
  hive -e "use $hiveDbName; \
drop table if exists $tempTabName;"

hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid}/* 1>/dev/null 2>&1
hadoop fs -mkdir ${TemphbasePath}${temp_startRecoid} 1>/dev/null 2>&1

#重新创建临时表
hive -e "use $hiveDbName;\
    create external table $tempTabName ($hiveTableCols) row format delimited fields terminated by '\t' location \"${TemphbasePath}${temp_startRecoid}\";"

echo "++++++++++++++++++++开始导入hbases数据:++++++++++++++++++++++++++++++++"
echo " insert into table $tempTabName ${sql} where year=year('$startRecoid')  and month=month('$startRecoid')  and day=day('$startRecoid') order by RowKey asc ; "
hive -e "\
set mapreduce.job.queuename=production; \
use $hiveDbName;\
insert into table $tempTabName ${sql} where year=year('$startRecoid')  and month=month('$startRecoid')  and day=day('$startRecoid') order by RowKey asc ;" 


startRecoid=$inter
COUNTDAY=`expr $COUNTDAY + 1`
partf1=`hadoop fs -du -s ${TemphbasePath}${temp_startRecoid} |awk '{print $1 }'`
if [ "$partf1" == "0"  ]
then
hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid}/* 1>/dev/null 2>&1
echo "---------------file is null partf1 :$partf1-----------------------"
continue
fi

hbase org.apache.hadoop.hbase.mapreduce.ImportTsv '-Dimporttsv.separator= ' -Dimporttsv.columns='HBASE_ROW_KEY,f1:c1' -Dimporttsv.bulk.output=${hbaseTablePath}/${hbaseTableName}${temp_startRecoid} biz1:${hbaseTableName}${temp_startRecoid} ${TemphbasePath}${temp_startRecoid}

echo "此时的查询时间值为:+++++++++++++++++++++++++++++${temp_startRecoid}"
echo "此时的hbase存放目录值为:+++++++++++++++++++++++++++++${hbaseTablePath}/${hbaseTableName}${temp_startRecoid}"
echo "此时的表名为:+++++++++++++++++++++++++++++${hbaseTableName}${temp_startRecoid}"
echo "表临时文件目录为:+++++++++++++++++++++++++++++${TemphbasePath}${temp_startRecoid}"



        hadoop dfs -chmod -R 777  ${hbaseTablePath}
        #从hfile中load到hbase 表中(这步可以生成表)
        hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid} biz1:${hbaseTableName}${temp_startRecoid}
        
      
       hadoop fs -rm -r ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid}
        hive -e "use $hiveDbName; \
drop table if exists $tempTabName;"
    hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid} 1>/dev/null 2>&1


echo "-------------循环 $COUNTDAY 次完成---------------------------"

done


echo "load Data all over\n" >> log.out
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值