建库
.create database db_app;
建表
1、元数据表,用来加载清洗好的数据
create table ods_app_log(
cid_sn string,
mobile_data_type string,
os_ver string,
mac string,
resolution string,
commit_time string,
sdk_ver string,
device_id_type string,
city string,
android_id string,
device_model string,
carrier string,
promotion_channel string,
app_ver_name string,
imei string,
app_ver_code string,
pid string,
net_type string,
device_id string,
app_device_id string,
release_channel string,
country string,
time_zone string,
os_name string,
manufacture string,
commit_id string,
app_token string,
account string,
app_id string,
build_num string,
language string,
uid string
)
2、活跃用户表
create table etl_user_active_day(
uid string,
commit_time string,
city string,
release_channel string,
app_ver_name string
)partitioned by (day string);
3、活跃用户维度统计表
create table dim_user_active(
city string,
release_channel string,
app_ver_name string,
active_user_cnt int
)partitioned by (day string,flag string);
4历史用户表
create table etl_history_user(uid string);
5、创建每日新增用户表
create table etl_user_new_day(
uid string,
commit_time string,
city string,
release_channel string,
app_ver_name string
)partitioned by (day string);
6、新增用户维度统计表
create table dim_user_new(
city string,
release_channel string,
app_ver_name string,
newuser_cnt int
)partitioned by (day string,flag string);
活跃用户数据分析
1、计算数据到活跃用户表
/** 计算数据到活跃用户表 **/
/** 20170101 **/
insert into table etl_user_active_day partition(day="20170101")
select uid,commit_time,city , release_channel,app_ver_name
from
(select
uid,commit_time, city , release_channel,app_ver_name,
row_number() over(partition by uid order by commit_time) as rn
from ods_app_log where day="20170101") tmp1
where tmp1.rn=1;
/**20170102**/
insert into table etl_user_active_day partition(day="20170102")
select uid,commit_time,city , release_channel,app_ver_name
from
(select
uid,commit_time, city , release_channel,app_ver_name,
row_number() over(partition by uid order by commit_time) as rn
from ods_app_log where day="20170102") tmp1
where tmp1.rn=1;
2、维度统计
分维度进行统计
时间 城市 渠道 版本
当天 0 0 0
当天 0 0 1
当天 0 1 0
当天 0 1 1
当天 1 0 0
当天 1 0 1
当天 1 1 0
当天 1 1 1
from etl_user_active_day
insert into table dim_user_active partition(day="20170101",flag="000")
select "all","all","all",count(1) where day="20170101"
insert into table dim_user_active partition(day="20170101",flag="001")
select "all","all",app_ver_name,count(1) where day="20170101"
group by app_ver_name
insert into table dim_user_active partition(day="20170101",flag="010")
select "all",release_channel,"all",count(1) where day="20170101"
group by release_channel
insert into table dim_user_active partition(day="20170101",flag="011")
select "all",release_channel,app_ver_name,count(1) where day="20170101"
group by release_channel,app_ver_name
insert into table dim_user_active partition(day="20170101",flag="100")
select city,"all","all",count(1) where day="20170101"
group by city
insert into table dim_user_active partition(day="20170101",flag="101")
select city,"all",app_ver_name,count(1) where day="20170101"
group by city,app_ver_name
insert into table dim_user_active partition(day="20170101",flag="110")
select city,release_channel,"all",count(1) where day="20170101"
group by city,release_channel
insert into table dim_user_active partition(day="20170101",flag="111")
select city,release_channel,app_ver_name,count(1) where day="20170101"
group by city,release_channel,app_ver_name;
新增用户数据分析
1、将活跃用户表和历史用户表join,求出历史用户表中uid为空的即为新增用户
insert into table etl_user_new_day partition(day="20170101")
select a.uid,a.commit_time,a.city,a.release_channel,a.app_ver_name
from etl_user_active_day as a
left join
etl_history_user as b
on a.uid=b.uid
where a.day="20170101" and b.uid is null;
2、将上一步生成的新增用户加入历史用户表中,称为历史用户,以便下一次统计
insert into table etl_history_user
select uid from etl_user_new_day where day="20170101";
3、新增用户维度统计
from etl_user_new_day
insert into table dim_user_new partition(day="20170101",flag="000")
select "all","all","all",count(1) where day="20170101"
insert into table dim_user_new partition(day="20170101",flag="001")
select "all","all",app_ver_name,count(1) where day="20170101"
group by app_ver_name
insert into table dim_user_new partition(day="20170101",flag="010")
select "all",release_channel,"all",count(1) where day="20170101"
group by release_channel
insert into table dim_user_new partition(day="20170101",flag="011")
select "all",release_channel,app_ver_name,count(1) where day="20170101"
group by release_channel,app_ver_name
insert into table dim_user_new partition(day="20170101",flag="100")
select city,"all","all",count(1) where day="20170101"
group by city
insert into table dim_user_new partition(day="20170101",flag="101")
select city,"all",app_ver_name,count(1) where day="20170101"
group by city,app_ver_name
insert into table dim_user_new partition(day="20170101",flag="110")
select city,release_channel,"all",count(1) where day="20170101"
group by city,release_channel
insert into table dim_user_new partition(day="20170101",flag="111")
select city,release_channel,app_ver_name,count(1) where day="20170101"
group by city,release_channel,app_ver_name;