创建分区表
CREATE external TABLE ext_startup_logs(
userId string,
appPlatform string,
appId string,
startTimeInMs bigint,
activeTimeInMs bigint,
appVersion string,
city string)
PARTITIONED BY (y string, m string,d string)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
STORED AS TEXTFILE;
定时脚本
systime=`date -d "-3 minute" +%Y-%m-%d-%H%M`
y=`echo ${systime} | awk -F '-' '{print $1}'`
m=`echo ${systime} | awk -F '-' '{print $2}'`
d=`echo ${systime} | awk -F '-' '{print $3}'`
hm=`echo ${systime} | awk -F '-' '{print $4}'`
/opt/module/hive-1.2.1/bin/hive -e "load data inpath '/${y}/${m}/${d}/${hm}' into table applogs_db.ext_startup_logs partition(y='${y}',m='${m}',d='${d}')"
注意:
1、为了避免把还在写入的文件导入到hive表里面,设置获取3分中前的文件,(文件按分钟滚动)
2、导入到hive表中后,原文件就没有了