茄子快传数据分析之活跃用户和新增用户数据分析

版权声明:闻道有先后,术业有专攻。 https://blog.csdn.net/wlk_328909605/article/details/82255758

第一步:数据定义—建模(引用数据仓库的概念)
1.创建贴源数据表

create database app_db;//建库
use app_db;
create external table ods_app_log(
 cid_sn              string,  
 mobile_data_type    string,  
 os_ver              string,  
 mac                 string,  
 resolution          string,  
 commit_time         string,  
 sdk_ver             string,  
 device_id_type      string,  
 city                string,  
 android_id          string,  
 device_model        string,  
 carrier             string,  
 promotion_channel   string,  
 app_ver_name        string,  
 imei                string,  
 app_ver_code        string,  
 pid                 string,  
 net_type            string,  
 device_id           string,  
 app_device_id       string,  
 release_channel     string,  
 country             string,  
 time_zone           string,  
 os_name             string,  
 manufacture         string,  
 commit_id           string,  
 app_token           string,  
 account             string,  
 app_id              string,  
 build_num           string,  
 language            string,
 user_id    string) 
 partitioned by (day string)
 row format delimited fields terminated by ',';

ps:这些字段都可以在源数据中得到。

2.建模

 /* ETL:活跃用户信息表 */
 -- user_id,时间,城市,推广渠道,下载渠道,版本,.
  create table etl_user_active_day(
 user_id string,
 city string,
 promotion_channel string,
 release_channel string,
 app_ver_name string)
 partitioned by (day string);




  /* ETL:新用户信息表 */
 -- user_id,时间,城市,推广渠道,下载渠道,版本,.
  create table etl_user_new_day(
 user_id string,
 city string,
 promotion_channel string,
 release_channel string,
 app_ver_name string)
 partitioned by (day string);


 /* DIM:维度统计报表——日活数维度统计报表 */
 create table dim_user_active_d(
 city string,
 promotion_channel string,
 release_channel string,
 app_ver_name string,
 cnt int 
 )
 partitioned by (day string);


 /* DIM:维度统计报表——日新数维度统计报表 */ 
 create table dim_user_new_d(
 city string,
 promotion_channel string,
 release_channel string,
 app_ver_name string,
 cnt int 
 )
 partitioned by (day string);




/* 历史用户表 */
create table etl_user_history(
user_id string);

3.分析活跃用户
在这里,将活跃用户简单的定义为在数据当天出现的用户,忽略用户出现的次数。

先找出所有的活跃用户,然后再根据不同的维度进行分析。

/* 抽取当日活跃用户信息  */
insert into app_db.etl_user_active_day partition(day='20170101')
select 
 user_id,
 city,
 promotion_channel,//推广渠道
 release_channel,//下载渠道
 app_ver_name
from ods_app_log
where day='20170101';
/ ** 维度组合逐一查询 **/
-- 区分城市
insert into table dim_city_user_active_d
partition(day='20170101')
select city,'all','all','all',count(1)
from 
(
select city,user_id
from app_db.etl_user_active_day
where day='20170101'
group by city,user_id) o1
group by city;


-- 区分版本
insert into table dim_city_user_active_d
partition(day='20170101')
select 'all','all','all',app_ver_name,count(1)
from 
(
select app_ver_name,user_id
from app_db.etl_user_active_day
where day='20170101'
group by app_ver_name,user_id) o1
group by app_ver_name;


-- 区分城市和版本

insert into table dim_city_user_active_d
partition(day='20170101')
select  city,'all','all',app_ver_name,count(1)
from
(select city,app_ver_name,user_id
from app_db.etl_user_active_day 
where day='20170101'
group by city,app_ver_name,user_id) o1 
group by city,app_ver_name;
........

也可以多重插入一个表中,设置flag,然后再根据不同的维度进行查询。

from etl_user_active_day

insert into table dim_user_active partition(day="20170101",flag="000")
select "all","all","all",count(1)  where day="20170101"

insert into table dim_user_active partition(day="20170101",flag="001")
select "all","all",app_ver_name,count(1) where day="20170101"
group by app_ver_name

insert into table dim_user_active partition(day="20170101",flag="010")
select "all",release_channel,"all",count(1) where day="20170101"
group by release_channel

insert into table dim_user_active partition(day="20170101",flag="011")
select "all",release_channel,app_ver_name,count(1) where day="20170101"
group by release_channel,app_ver_name

insert into table dim_user_active partition(day="20170101",flag="100")
select city,"all","all",count(1) where day="20170101"
group by city

insert into table dim_user_active partition(day="20170101",flag="101")
select city,"all",app_ver_name,count(1) where day="20170101"
group by city,app_ver_name

insert into table dim_user_active partition(day="20170101",flag="110")
select city,release_channel,"all",count(1) where day="20170101"
group by city,release_channel

insert into table dim_user_active partition(day="20170101",flag="111")
select city,release_channel,app_ver_name,count(1) where day="20170101"
group by city,release_channel,app_ver_name;

2.新增用户分析
引入一个历史用户表,将新增用户定义为历史用户表中没有的用户。

//将新增用户添加进新增用户表中
insert into table etl_user_new_day partition(day='20170101')
select 
 o1.uid
 o1.city,
 o1.release_channel,
 o1.app_ver_name
from 
(select 
 a.*,
 b.uid 
from etl_user_active_day a
left join 
etl_history_user b
on a.uid = b.uid
where a.day='20170101') o1
where b.uid is null;


from etl_user_active_day

insert into table dim_user_new partition(day="20170101",flag="000")
select "all","all","all",count(1) where day="20170101"

insert into table dim_user_new partition(day="20170101",flag="001")
select "all","all",app_ver_name,count(1) where day="20170101"

insert into table dim_user_new partition(day="20170101",flag="010")
select "all",release_channel,"all",count(1) where day="20170101"

insert into table dim_user_new partition(day="20170101",flag="011")
select "all",release_channel,app_ver_name,count(1) where day="20170101"

insert into table dim_user_new partition(day="20170101",flag="100")
select city,"all","all",count(1) where day="20170101"

insert into table dim_user_new partition(day="20170101",flag="101")
select city,"all",app_ver_name,count(1) where day="20170101"

insert into table dim_user_new partition(day="20170101",flag="110")
select city,release_channel,"all",count(1) where day="20170101"

insert into table dim_user_new partition(day="20170101",flag="111")
select city,release_channel,app_ver_name,count(1) where day="20170101";

这里都是简单的实现。

展开阅读全文

没有更多推荐了,返回首页