1.进行数据清理 MapReduce
#!/bin/bash
day_str=`date -d '-1 day' + '%Y-%m-%d'`
inpath=/app-log-data/data/$day_str
outpath=/app-log-data/clean/${day_str}-clean
echo "准备清洗$day_str数据..."
/root/apps/hadoop-2.8.3/bin/hadoop jar /root/data-clean.jar cn.edu360.app.log.mr.AppLogDataClean $inpath $outpath
---------------------------------------------------------------------------------------------------------------------------
2.原始数据建模
2.1原始数据表
--建表语句
CREATE EXTERNAL TABLE ods_app_log (
sdk_ver string
,time_zone string
,commit_id string
,commit_time string
,pid string
,app_token string
,app_id string
,device_id string
,device_id_type string
,release_channel string
,app_ver_name string
,app_ver_code string
,os_name string
,os_ver string
,LANGUAGE string
,country string
,manufacture string
,device_model string
,resolution string
,net_type string
,account string
,app_device_id string
,mac string
,android_id string
,imei string
,cid_sn string
,build_num string
,mobile_data_type string
,promotion_channel string
,carrier string
,city string
,user_id string
) partitioned BY (
day string
,os string
) row format delimited fields terminated BY '\001' location '/app-log-data/clean';
--改变数据表分区的结构
ALTER TABLE ods_app_log ADD PARTITION (day = '2018-05-19',os = 'android') location '/app-log-data/clean/2018-05-19/android';
ALTER TABLE ods_app_log ADD PARTITION (day = '2018-05-19',os = 'ios') location '/app-log-data/clean/2018-05-19/ios';
--数据导入语句
load data inpath '/app-log-data/clean/2018-05-19-clean/android' into table ods_app_log partition(day = '2018-05-19',os = 'android');
load data inpath '/app-log-data/clean/2018-05-19-clean/ios' into table ods_app_log partition(day = '2018-05-19',os = 'ios');
2.2日活数据表
--建表语句
CREATE TABLE etl_user_active_day (
sdk_ver string
,time_zone string
,commit_id string
,commit_time string
,pid string
,app_token string
,app_id string
,device_id string
,device_id_type string
,release_channel string
,app_ver_name string
,app_ver_code string
,os_name string
,os_ver string
,language string
,country string
,manufacture string
,device_model string
,resolution string
,net_type string
,ac
APP数据模拟处理流程—[总结篇]
最新推荐文章于 2023-05-12 23:35:21 发布