需求:
1、测试数据结构
id bigint ,src_ipv4 bigint ,src_port int ,dst_ipv4 bigint ,dst_port int ,protocol int ,start_time string ,end_time string ,receive_time string
2、start_time 如2018-08-24 10:18:37,使用split(ebt.start_time,' ')[0] time,生成新字段:2018-08-24
3、目的为提高查询效率,按月表-天分区,查询效率快
实现:
1)使用crontab定时每天通过sqoop把mysql中数据导入到hive中的临时月表中eventbase_temp
2)创建-数据月表eventbase_201808,新增字段time
3)把临时月表中的数据按天分区导入到月表中
4)删除临时月表
脚本:
#!/bin/bash
HIVE_DB=shoudu.
#201703
datename=`date -d last-day +"%Y%m"`
echo $datename
#jdbc:mysql://${DB1IP}:${DB1PORT}/${DATABASW}?useUnicode=true&characterEncoding=utf-8&tinyInt1isBit=false
DB1IP='jdbc:mysql://192.168.2.111:3306/test?useUnicode=true&characterEncoding=utf-8&tinyInt1isBit=false'
echo $DB1IP
#username
DB1NAME=root
echo $DB1NAME
#password
DB1PASS=111111
echo $DB1PASS
#mysql table name
tablename='event_base'
echo $tablename
today=`date +"%Y-%m-%d" -d "-0 days"`
yesterday=`date +"%Y-%m-%d" -d "-1 days"`
creat_eventbase_partitioned="
create table ${HIVE_DB}eventbase_$datename (
id bigint ,src_ipv4 bigint ,src_port int ,dst_ipv4 bigint ,dst_port int ,protocol int ,start_time string ,end_time string ,receive_time string)
partitioned by (time string)
stored as PARQUET;
"
insert_eventbase_partitioned="
set hive.exec.dynamic.partition.mode=nonstrict;
insert into table
${HIVE_DB}eventbase_$datename
partition(time)
select
ebt.id, ebt.src_ipv4, ebt.src_port, ebt.dst_ipv4, ebt.dst_port, ebt.protocol, ebt.start_time, ebt.end_time,
ebt.receive_time,
split(ebt.start_time,' ')[0] time
from
${HIVE_DB}eventbase_temp ebt
distribute by time;
"
#sqoop mysql-->hive
sqoop import --connect $DB1IP \
--username $DB1NAME --password $DB1PASS \
--hive-table ${HIVE_DB}eventbase_temp \
--table $tablename \
--hive-import
--hive-drop-import-delims
--where "start_time < '${today} 00:00:00' and start_time >= '${yesterday} 00:00:00' "
isok=$?
echo $isok
if [ $isok == "0" ]
then
#导入分区库成功
I_STATS=0
echo '-->mysql -> hive success'
else
#导入分区库失败
I_STATS=1
echo $I_STATS
echo '-->mysql -> hive failed'
exit 0
fi
#建表
hive -e "$creat_eventbase_partitioned"
echo '-->start'
#插入分区库 --!!!--是否删除temp--
hive -e "$insert_eventbase_partitioned"
isok=$?
echo $isok
if [ $isok == "0" ]
then
#导入分区库成功
I_STATS=0
echo '-->insert_eventbase_partitioned success'
else
#导入分区库失败
I_STATS=1
echo $I_STATS
echo '-->insert_eventbase_partitioned failed'
exit 0
fi
#删除temp--
hive -e "drop table ${HIVE_DB}eventbase_temp;"
isok=$?
echo $isok
if [ $isok == "0" ]
then
#导入分区库成功
I_STATS=0
echo '-->truncate success'
else
#导入分区库失败
I_STATS=1
echo $I_STATS
echo '-->truncate failed'
exit 0
fi
#更新impala
impala-shell -q 'invalidate metadata'
echo "---->finish-->$datename"
echo $I_STATS