准备表
CREATE EXTERNAL TABLE ods_app_log (
sdk_ver string
,time_zone string
,commit_id string
,commit_time string
,pid string
,app_token string
,app_id string
,device_id string
,device_id_type string
,release_channel string
,app_ver_name string
,app_ver_code string
,os_name string
,os_ver string
,LANGUAGE string
,country string
,manufacture string
,device_model string
,resolution string
,net_type string
,account string
,app_device_id string
,mac string
,android_id string
,imei string
,cid_sn string
,build_num string
,mobile_data_type string
,promotion_channel string
,carrier string
,city string
,user_id string
) partitioned BY (
day string
,os string
) row format delimited fields terminated BY '\001' location '/app-log-data/clean';
ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-21',os = 'android') location '/app-log-data/clean/2017-09-21/android';
ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-21',os = 'ios') location '/app-log-data/clean/2017-09-21/ios';
ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-22',os = 'android') location '/app-log-data/clean/2017-09-22/android';
ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-22',os = 'ios') location '/app-log-data/clean/2017-09-22/ios';
ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-23',os = 'android') location '/app-log-data/clean/2017-09-23/android';
ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-23',os = 'ios') location '/app-log-data/clean/2017-09-23/ios';
ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-24',os = 'android') location '/app-log-data/clean/2017-09-24/android';
ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-25',os = 'ios') location '/app-log-data/clean/2017-09-25/ios';
--新增用户表
CREATE EXTERNAL TABLE etl_user_new_day (
sdk_ver string
,time_zone string
,commit_id string
,commit_time string
,pid string
,app_token string
,app_id string
,device_id string
,device_id_type string
,release_channel string
,app_ver_name string
,app_ver_code string
,os_name string
,os_ver string
,LANGUAGE string
,country string
,manufacture string
,device_model string
,resolution string
,net_type string
,account string
,app_device_id string
,mac string
,android_id string
,imei string
,cid_sn string
,build_num string
,mobile_data_type string
,promotion_channel string
,carrier string
,city string
,user_id string
) partitioned BY (
day string
) row format delimited fields terminated BY '\001';
-- 1 历史用户表
create table etl_user_history(user_id string);
insert into etl_user_history(user_id)
select distinct user_id from ods_app_log;
-- 2 当日新增用户表:存所有字段(每个人时间最早的一条),带有一个分区字段:day string;
create table etl_user_new_day like etl_user_active_day;
中间处理
当日活跃-历史用户表 --> 新增用户表的当日分区
insert into etl_user_new_day partition(day='2017-09-21')
SELECT sdk_ver
,time_zone
,commit_id
,commit_time
,pid
,app_token
,app_id
,device_id
,device_id_type
,release_channel
,app_ver_name
,app_ver_code
,os_name
,os_ver
,LANGUAGE
,country
,manufacture
,device_model
,resolution
,net_type
,account
,app_device_id
,mac
,android_id
,imei
,cid_sn
,build_num
,mobile_data_type
,promotion_channel
,carrier
,city
,a.user_id
from ods_app_log a left join etl_user_history b on a.user_id = b.user_id
where a.day='2017-09-21' and b.user_id is null;
-- 2 将当日新增用户的user_id追加到历史表
insert into table etl_user_history
select distinct user_id from etl_user_new_day where day='2017-09-21';
-- 1 日新维度统计报表--数据建模
create table dim_user_new_day(os_name string,city string,release_channel string,app_ver_name string,cnts int)
partitioned by (day string, dim string);
计算留存
--次日留存用户统计
--昨天的数据跟今天的数据关联,yestoday=today-1天
select yestoday.day
,count(distinct yestoday.user_id) cnt1
,count(distinct today.user_id) cnt2
from
(select * from ods_app_log where day>='2017-09-21' and day<='2017-09-24') as yestoday
left join
(select * from ods_app_log where day>='2017-09-21' and day<='2017-09-24') as today
on yestoday.day =date_add(today.day,-1) and yestoday.user_id=today.user_id
group by yestoday.day;
select yestoday.day
,count(distinct yestoday.user_id) as cnt1
,count(distinct today.user_id) as cnt2
from
(select user_id,day from ods_app_log where day='2017-09-21') as yestoday
left join
(select user_id,day from ods_app_log where day='2017-09-22') as today
on yestoday.user_id=today.user_id
group by yestoday.day;