hive电商项目:2用户主题

本文介绍了如何使用Hive进行电商项目的用户主题分析,包括数据导入、表设计、用户行为数据处理及统计,揭示用户购物习惯和兴趣偏好。
摘要由CSDN通过智能技术生成
创建dw层的数据
##为新增用户、总用户、活跃用户做计算:
create table if not exists dwd_user(
pl string,
pl_id string,
en string,
en_id string,
browser_name string,
browser_id string,
browser_version string,
browser_version_id string,
province_name string,
province_id string,
city_name string,
city_id string,
uid string
)
partitioned by (month string,day string)
row format delimited fields terminated by '\u0001'
stored as orc
;

导入数据:
from (
select
ol.pl,
dp.id as pl_id,
ol.en,
de.id as en_id,
ol.browser_name,
db.id browser_id,
ol.browser_version ,
dv.id browser_version_id,
ol.province ,
pv.id province_id,
ol.city ,
dc.id city_id,
ol.u_ud
from ods.ods_logs_orc ol
left join dim.dim_platform dp on dp.platform_name = ol.pl
left join dim.dim_event_name de on de.name = ol.en
left join dim.dim_browser_name db on db.browser_name = ol.browser_name
left join dim.dim_browser_version dv on dv.browser_version = ol.browser_version
left join dim.dim_province pv on pv.province = ol.province
left join dim.dim_city dc on dc.city = ol.city
) tmp
insert into dwd_user partition (month='1',day = '01')
select *
;

-------------------------------

创建dm层的数据:
#用户主题下新增用户、新增总用户、日活跃用户
create table if not exists dm_user_users(
pl string,
pl_id string,
new_user_count int,
new_total_user_count int,
active_user_count int
)
partitioned by (month string,day string)
row format delimited fields terminated by '\u0001'
stored as orc
;

导入数据:
from (
select 
us.pl,
us.pl_id,
count(distinct us.uid) as new_user_count,
count(distinct us.uid) + nvl(uu.new_total_user_count,0) as new_total_user_count,
0 as active_user_count
from dwd.dwd_user us
left join dm_user_users uu on uu.month = '1' and uu.day = '00' and us.pl_id = uu.pl_id
where us.month = 1 and us.day = '01' and us.en_id = 1
group by us.pl,us.pl_id,uu.new_total_user_count
union all select 
us.pl,
us.pl_id,
0 as new_user_count,
0 as new_total_user_count,
count(distinct us.uid) as active_user_count
from dwd.dwd_user us
where us.month = 1 and us.day = '01'
group by us.pl,us.pl_id)tmp

insert into dm_user_users partition (month = 1,day ='01')

select tmp.pl,tmp.pl_id,
sum(tmp.new_user_count) new_user_count,
sum(tmp.new_total_user_count) new_total_user_count,
sum(tmp.active_user_count) active_user_count

group by tmp.pl,tmp.pl_id
;
注:dwd_user.en_id = 1 表示lunch事件
输出:
dm_user_users.pl        dm_user_users.pl_id     dm_user_users.new_user_count    dm_user_users.new_total_user_count  dm_user_users.active_user_count  dm_user_users.month     dm_user_users.day
java    NULL    0       0       1       1       01
java_server     1       0       0       2       1       01
website 2       5       5       8       1       01


#浏览器模块下的新增用户、新增总用户、日活跃用户
create table if not exists dm_browser_users(
pl string,
pl_id string,
browser_name string,
browser_id string,
browser_version string,
browser_version_id string,
new_user_count int,
new_total_user_count int,
active_user_count int
)
partitioned by (month string,day string)
row format delimited fields terminated by '\u0001'
stored as orc
;
#分析:平台和浏览器有关,因为有平台相关字段
#browser_version_id和浏览器类型无关
from (
select 
us.pl,
us.pl_id,
us.browser_name,
us.browser_id,
us.browser_version,
us.browser_version_id,
count(distinct us.uid) as new_user_count,
count(distinct us.uid) + nvl(bu.new_total_user_count,0) as new_total_user_count,
0 as active_user_count
from dwd.dwd_user us
left join dm_browser_users bu on bu.month = '1' and bu.day = '00' and bu.pl_id=us.pl_id and 
bu.browser_id = us.browser_id and 
bu.browser_version_id = us.browser_version_id 
where us.month = 1 and us.day = '01' and us.en_id =1
group by us.pl,us.pl_id,us.browser_name,us.browser_id,us.browser_version,us.browser_version_id,bu.new_total_user_count
union all select 
us.pl,
us.pl_id,
us.browser_name,
us.browser_id,
us.browser_version,
us.browser_version_id,
0 as new_user_count,
0 as new_total_user_count,
count(distinct us.uid) as active_user_count
from dwd.dwd_user us
where us.month = 1 and us.day = '01'
group by us.pl,us.pl_id,us.browser_name,us.browser_id,us.browser_version,us.browser_version_id)tmp
insert into dm_browser_users partition (month = 1,day ='01')
select 
tmp.pl,
tmp.pl_id,
tmp.browser_name,
tmp.browser_id,
tmp.browser_version,
tmp.browser_version_id,
sum(tmp.new_user_count) new_user_count,
sum(tmp.new_total_user_count) new_total_user_count,
sum(tmp.active_user_count) active_user_count
group by tmp.pl,tmp.pl_id,tmp.browser_name,tmp.browser_id,tmp.browser_version,tmp.browser_version_id;
输出:
dm_browser_users.pl     dm_browser_users.pl_id  dm_browser_users.browser_name   dm_browser_users.browser_id     dm_browser_users.browser_version     dm_browser_users.browser_version_id     dm_browser_users.new_user_count dm_browser_us                                     ers.new_total_user_count        dm_browser_users.active_user_count      dm_browser_users.month  dm_browser_users.day
java    NULL    Chrome  4       31.0.1650.63    4       0       0       1       1       01
java_server     1       Chrome  4       31.0.1650.63    4       0       0       1       1       01
java_server     1       null    3       null    3       0       0       1       1       01
website 2       Chrome  4       31.0.1650.63    4       0       0       1       1       01
website 2       Chrome  4       47.0.2526.106   NULL    1       1       1       1       01
website 2       Chrome  4       70.0.3538.77    NULL    1       1       2       1       01
website 2       Firefox NULL    63.0    NULL    0       0       1       1       01
website 2       IE      1       8.0     1       2       2       2       1       01
website 2       Sogou Explorer  2       2.X     2       1       1       1       1       01
Time taken: 0.177 seconds, Fetched: 9 row(s)

#地域主题下的活跃用户
create table if not exists dm_area_users(
pl string,
pl_id string,
province string,
province_id string,
city string,
city_id string,
active_user_count int
)
partitioned by (month string,day string)
row format delimited fields terminated by '\u0001'
stored as orc
;
from (
select 
us.pl,
us.pl_id,
us.province_name,
us.province_id,
us.city_name,
us.city_id,
count(distinct us.uid) as active_user
from dwd.dwd_user us
where us.month = 1 and us.day = '01'
group by us.pl,us.pl_id,us.province_name,us.province_id,us.city_name,us.city_id)tmp
insert into dm_area_users partition (month = 1,day ='01')
select 
tmp.pl,
tmp.pl_id,
tmp.province_name,
tmp.province_id,
tmp.city_name,
tmp.city_id,
tmp.active_user
group by tmp.pl,tmp.pl_id,tmp.province_name,tmp.province_id,tmp.city_name,tmp.city_id,tmp.active_user;

输出:
hive (dm)> select * from dm_area_users;
OK
dm_area_users.pl        dm_area_users.pl_id     dm_area_users.province  dm_area_users.province_id       dm_area_users.city   dm_area_users.city_id   dm_area_users.active_user_count dm_area_users.month     dm_area_users.day
java    NULL    北京市  2002    昌平区  NULL    1       1       01
java_server     1       广西省  NULL    广西南宁市      NULL    1       1       01
java_server     1       贵州省  2025    贵阳市  4266    1       1       01
website 2       北京市  2002    昌平区  NULL    4       1       01
website 2       广西省  NULL    广西南宁市      NULL    1       1       01
website 2       河南省  2017    平顶山市        4109    1       1       01
website 2       贵州省  2025    贵阳市  4266    2       1       01
website 2       贵州省  2025    黔西南州兴义市  NULL    2       1       01

 

评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值