第一步:数据定义—建模(引用数据仓库的概念)
1.创建贴源数据表
create database app_db;//建库
use app_db;
create external table ods_app_log(
cid_sn string,
mobile_data_type string,
os_ver string,
mac string,
resolution string,
commit_time string,
sdk_ver string,
device_id_type string,
city string,
android_id string,
device_model string,
carrier string,
promotion_channel string,
app_ver_name string,
imei string,
app_ver_code string,
pid string,
net_type string,
device_id string,
app_device_id string,
release_channel string,
country string,
time_zone string,
os_name string,
manufacture string,
commit_id string,
app_token string,
account string,
app_id string,
build_num string,
language string,
user_id string)
partitioned by (day string)
row format delimited fields terminated by ',';
ps:这些字段都可以在源数据中得到。
2.建模
/* ETL:活跃用户信息表 */
-- user_id,时间,城市,推广渠道,下载渠道,版本,.
create table etl_user_active_day(
user_id string,
city string,
promotion_channel string,
release_channel string,
app_ver_name string)
partitioned by (day string);
/* ETL:新用户信息表 */
-- user_id,时间,城市,推广渠道,下载渠道,版本,.
create table etl_user_new_day(
user_id string,
city string,
promotion_channel string,
release_channel string,
app_ver_name string)
partitioned by (day string);
/* DIM:维度统计报表——日活数维度统计报表 */
create table dim_user_active_d(
city string,
promotion_channel string,
release_channel string,
app_ver_name string,
cnt int
)
partitioned by (day string);
/* DIM:维度统计报表——日新数维度统计报表 */
create table dim_user_new_d(
city string,
promotion_channel string,
release_channel string,
app_ver_name string,
cnt int
)
partitioned by (day string);
/* 历史用户表 */
create table etl_user_history(
user_id string);
3.分析活跃用户
在这里,将活跃用户简单的定义为在数据当天出现的用户,忽略用户出现的次数。
先找出所有的活跃用户,然后再根据不同的维度进行分析。
/* 抽取当日活跃用户信息 */
insert into app_db.etl_user_active_day partition(day='20170101')
select
user_id,
city,
promotion_channel,//推广渠道
release_channel,//下载渠道
app_ver_name
from ods_app_log
where day='20170101';
/ ** 维度组合逐一查询 **/
-- 区分城市
insert into table dim_city_user_active_d
partition(day='20170101')
select city,'all','all','all',count(1)
from
(
select city,user_id
from app_db.etl_user_active_day
where day='20170101'
group by city,user_id) o1
group by city;
-- 区分版本
insert into table dim_city_user_active_d
partition(day='20170101')
select 'all','all','all',app_ver_name,count(1)
from
(
select app_ver_name,user_id
from app_db.etl_user_active_day
where day='20170101'
group by app_ver_name,user_id) o1
group by app_ver_name;
-- 区分城市和版本
insert into table dim_city_user_active_d
partition(day='20170101')
select city,'all','all',app_ver_name,count(1)
from
(select city,app_ver_name,user_id
from app_db.etl_user_active_day
where day='20170101'
group by city,app_ver_name,user_id) o1
group by city,app_ver_name;
........
也可以多重插入一个表中,设置flag,然后再根据不同的维度进行查询。
from etl_user_active_day
insert into table dim_user_active partition(day="20170101",flag="000")
select "all","all","all",count(1) where day="20170101"
insert into table dim_user_active partition(day="20170101",flag="001")
select "all","all",app_ver_name,count(1) where day="20170101"
group by app_ver_name
insert into table dim_user_active partition(day="20170101",flag="010")
select "all",release_channel,"all",count(1) where day="20170101"
group by release_channel
insert into table dim_user_active partition(day="20170101",flag="011")
select "all",release_channel,app_ver_name,count(1) where day="20170101"
group by release_channel,app_ver_name
insert into table dim_user_active partition(day="20170101",flag="100")
select city,"all","all",count(1) where day="20170101"
group by city
insert into table dim_user_active partition(day="20170101",flag="101")
select city,"all",app_ver_name,count(1) where day="20170101"
group by city,app_ver_name
insert into table dim_user_active partition(day="20170101",flag="110")
select city,release_channel,"all",count(1) where day="20170101"
group by city,release_channel
insert into table dim_user_active partition(day="20170101",flag="111")
select city,release_channel,app_ver_name,count(1) where day="20170101"
group by city,release_channel,app_ver_name;
2.新增用户分析
引入一个历史用户表,将新增用户定义为历史用户表中没有的用户。
//将新增用户添加进新增用户表中
insert into table etl_user_new_day partition(day='20170101')
select
o1.uid
o1.city,
o1.release_channel,
o1.app_ver_name
from
(select
a.*,
b.uid
from etl_user_active_day a
left join
etl_history_user b
on a.uid = b.uid
where a.day='20170101') o1
where b.uid is null;
from etl_user_active_day
insert into table dim_user_new partition(day="20170101",flag="000")
select "all","all","all",count(1) where day="20170101"
insert into table dim_user_new partition(day="20170101",flag="001")
select "all","all",app_ver_name,count(1) where day="20170101"
insert into table dim_user_new partition(day="20170101",flag="010")
select "all",release_channel,"all",count(1) where day="20170101"
insert into table dim_user_new partition(day="20170101",flag="011")
select "all",release_channel,app_ver_name,count(1) where day="20170101"
insert into table dim_user_new partition(day="20170101",flag="100")
select city,"all","all",count(1) where day="20170101"
insert into table dim_user_new partition(day="20170101",flag="101")
select city,"all",app_ver_name,count(1) where day="20170101"
insert into table dim_user_new partition(day="20170101",flag="110")
select city,release_channel,"all",count(1) where day="20170101"
insert into table dim_user_new partition(day="20170101",flag="111")
select city,release_channel,app_ver_name,count(1) where day="20170101";
这里都是简单的实现。