从hive往hbase导数据

最新推荐文章于 2023-03-08 17:02:21 发布

nannan1232

最新推荐文章于 2023-03-08 17:02:21 发布

阅读量604

点赞数

分类专栏：大数据文章标签： sql server hbase hive

本文链接：https://blog.csdn.net/nannan1232/article/details/65635050

版权

大数据专栏收录该内容

15 篇文章 0 订阅

订阅专栏

#!/bin/sh
#数据库连接
#srcConnect="jdbc:sqlserver://10.2.5.231\MSSQLSERVER2014;username=dmData;password=data!23;database=GMVMonitorCenter_Test;characterEncoding=utf8"
#表名
dstTabName=wp04_0013_data
checkColumn="RecoID"
condiColumn="a.[记录时间]"
startRecoid=2015-10-27\ 00:00:00
plus=1
endRecodid=2017-01-11\ 00:00:00
echo "from ${startRecoid} to ${endRecodid}"
COUNTDAY=0
#hbase表名
hbaseTableName=wp04_0013_data_
#hbase存放的目录
hbaseTablePath=/user/hbase/multisplit_biz1
#临时表目录
TemphbasePath=/user/hbase/multisplit_biz1/${hbaseTableName}Temp

#hive数据库名
hiveDbName=gree_monitorcenter_biz1
#hive中临时表名
tempTabName=${dstTabName}Temp
#hive数据库中的表名
hiveTableName=wp04_0013_data
hiveTableCols="RowKey string,c1 string"

#要执行sql
columnsArry=(ProjID

DevID

...........

PC_M12)

sql=" select RowKey,concat(RecoTime,',',"
for column in ${columnsArry[@]}; do
if [[ "${column}" = "ReceiveTime" ]]; then
sql=${sql}"'',',',"
else
sql=${sql}"(case when ${column} is null then '' else ${column} end),',',"
fi
done
sql=${sql}"(case when GroupCtrlDataSign is null then '' else GroupCtrlDataSign end)) as c1 from ${hiveTableName} "

#分批导入
while [[ $startRecoid != $endRecodid ]]; do
inter=`date -d "1 day $startRecoid" +%Y-%m-%d\ %H:%M:%S`
temp_endRecodid=`echo $endRecodid | awk '{print $1}'`
end_time_s=`date -d $temp_endRecodid +%s`
temp_inter=`echo $inter | awk '{print $1}'`
temphbase_inter=`echo $inter | awk '{print $1}' | tr -d "-"`
inter_time_s=`date -d $temp_inter +%s`
if [ $inter_time_s -gt $end_time_s ]
then
inter=$endRecodid
fi

temp_startRecoid=`echo $startRecoid | awk '{print $1}' | tr -d "-"`

hadoop fs -rm -r ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid}

#如果临时表存在，删除临时表
hive -e "use $hiveDbName; \
drop table if exists $tempTabName;"

hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid}/* 1>/dev/null 2>&1
hadoop fs -mkdir ${TemphbasePath}${temp_startRecoid} 1>/dev/null 2>&1

#重新创建临时表
hive -e "use $hiveDbName;\
create external table $tempTabName ($hiveTableCols) row format delimited fields terminated by '\t' location \"${TemphbasePath}${temp_startRecoid}\";"

echo "++++++++++++++++++++开始导入hbases数据：++++++++++++++++++++++++++++++++"
echo " insert into table $tempTabName ${sql} where year=year('$startRecoid') and month=month('$startRecoid') and day=day('$startRecoid') order by RowKey asc ; "
hive -e "\
set mapreduce.job.queuename=production; \
use $hiveDbName;\
insert into table $tempTabName ${sql} where year=year('$startRecoid') and month=month('$startRecoid') and day=day('$startRecoid') order by RowKey asc ;"

startRecoid=$inter
COUNTDAY=`expr $COUNTDAY + 1`
partf1=`hadoop fs -du -s ${TemphbasePath}${temp_startRecoid} |awk '{print $1 }'`
if [ "$partf1" == "0" ]
then
hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid}/* 1>/dev/null 2>&1
echo "---------------file is null partf1 :$partf1-----------------------"
continue
fi

hbase org.apache.hadoop.hbase.mapreduce.ImportTsv '-Dimporttsv.separator= ' -Dimporttsv.columns='HBASE_ROW_KEY,f1:c1' -Dimporttsv.bulk.output=${hbaseTablePath}/${hbaseTableName}${temp_startRecoid} biz1:${hbaseTableName}${temp_startRecoid} ${TemphbasePath}${temp_startRecoid}

echo "此时的查询时间值为：+++++++++++++++++++++++++++++${temp_startRecoid}"
echo "此时的hbase存放目录值为：+++++++++++++++++++++++++++++${hbaseTablePath}/${hbaseTableName}${temp_startRecoid}"
echo "此时的表名为：+++++++++++++++++++++++++++++${hbaseTableName}${temp_startRecoid}"
echo "表临时文件目录为：+++++++++++++++++++++++++++++${TemphbasePath}${temp_startRecoid}"

hadoop dfs -chmod -R 777 ${hbaseTablePath}
#从hfile中load到hbase 表中（这步可以生成表）
hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid} biz1:${hbaseTableName}${temp_startRecoid}

hadoop fs -rm -r ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid}
hive -e "use $hiveDbName; \
drop table if exists $tempTabName;"
hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid} 1>/dev/null 2>&1

echo "-------------循环 $COUNTDAY 次完成---------------------------"

done

echo "load Data all over\n" >> log.out