创建日志表ods_log
drop table if exists ods_log;
CREATE EXTERNAL TABLE ods_log (`line` string)
PARTITIONED BY (`dt` string)
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION '/warehouse/gmall/ods/ods_log'
;
加载数据
load data inpath '/origin_data/gmall/log/topic_log/2020-06-14'
into table ods_log partition(dt='2020-06-14');
查看结果
select * from ods_log limit 10;
为lzo压缩文件创建索引
[scorpion@warehouse102 ~]$ hadoop jar /opt/module/hadoop-3.1.3/share/hadoop/common/hadoop-lzo-0.4.20.jar com.hadoop.compression.lzo.DistributedLzoIndexer -Dmapreduce.job.queuename=hive /warehouse/gmall/ods/ods_log/dt=2020-06-14
创建ODS层用户行为数据处理脚本
[scorpion@warehouse102 bin]$ vim ods_hdfs_to_ods_log.sh
APP=gmall
hive=/opt/module/hive-3.1.2/bin/hive
hadoop=/opt/module/hadoop-3.1.3/bin/hadoop
if [ -n "$1" ] ;then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
echo ================== 日志日期为 $do_date ==================
sql="
load data inpath '/origin_data/$APP/log/topic_log/$do_date' into table ${APP}.ods_log partition(dt='$do_date');
"
$hive -e "$sql"
$hadoop jar /opt/module/hadoop-3.1.3/share/hadoop/common/hadoop-lzo-0.4.20.jar com.hadoop.compression.lzo.DistributedLzoIndexer -Dmapreduce.job.queuename=hive /warehouse/$APP/ods/ods_log/dt=$do_date
[scorpion@warehouse102 bin]$ sudo chmod 777 ods_hdfs_to_ods_log.sh
测试
[scorpion@warehouse102 bin]$ ods_hdfs_to_ods_log.sh 2020-06-15