Hive 交互事件分析

DWD_APP_GLB_DTL_DEMO

create table DWD_APP_GLB_DTL_DEMO
(
guid bigint,
eventid String,
event Map<String, String>,
uid String,
imei String,
mac String,
imsi String,
osName String,
osVer String,
androidId String,
resolution String,
deviceType String,
deviceId String,
uuid String,
appid String,
appVer String,
release_ch String,
promotion_ch String,
areacode String,
longtitude Double,
latitude Double,
carrier String,
netType String,
cid_sn String,
ip String,
sessionId String,
`timestamp` bigint,
province String,
city String,
district String,
year string,
month string,
day string,
datestr string
)
partitioned by (dt string);

数据:

链接:https://pan.baidu.com/s/1ALhcteGc5077zjZEBNGc2Q 
提取码:0g2h 

导入:

#数据导入
hive>load data local inpath '/root/tmp_data/data/file.txt'  into table DWD_APP_GLB_DTL_DEMO partition(dt='2023-03-01'); 

#数据导出
hive>insert overwrite local directory '/root/tmp_data/data/table' select * from  DWD_APP_GLB_DTL_DEMO;

#数据导出 指定分割符‘,’
hive>insert overwrite local directory '/root/tmp_data/data/table' row format delimited fields terminated by ',' select * from DWD_APP_GLB_DTL_DEMO;

DWD全局事件明细表

hive>create table DWD_APP_ITR_DTL_DEMO(
guid bigint,
eventid String,
event Map<String, String>,
uid String,
imei String,
mac String,
imsi String,
osName String,
osVer String,
androidId String,
resolution String,
deviceType String,
deviceId String,
uuid String,
appid String,
appVer String,
release_ch String,
promotion_ch String,
areacode String,
longtitude Double,
latitude Double,
carrier String,
netType String,
cid_sn String,
ip String,
sessionId  String,
`timestamp` bigint,
province String,
city String,
district String,
year string,
month string,
day string,
datestr string
)
partitioned by (dt string)
stored as orc;

源表,全局事件明细表
计算逻辑:过滤掉

hive>insert into table DWD_APP_ITR_DTL_DEMO partition(dt='2023-03-01')
select
guid,
eventid,
event,
uid,
imei,
mac,
imsi,
osName,
osVer,
androidId,
resolution,
deviceType,
deviceId,
uuid,
appid,
appVer,
release_ch,
promotion_ch,
areacode,
longtitude,
latitude,
carrier,
netType,
cid_sn,
ip,
sessionId,
`timestamp`,
province,
city,
district,
year,
month,
day,
datestr
from DWD_APP_GLB_DTL_DEMO
where dt='2023-03-01'
and eventid not in ('adClickEvent','adShowEvent','pgviewEvent');

事件次数,会话聚合表计算
建表
粒度 一个人,一个会话,一种事件 的汇总数

hive>create table DWS_APP_ITR_AGS_DEMO(
guid bigint,
sessionId string,
eventid string,
event_counts int,
province string,
city string,
district string,
osName string,
osVer string,
appVer string,
release_ch string
)
partitioned by (dt string)
stored as orc;

计算
分组聚合、求次数

hive>insert into DWS_APP_ITR_AGS_DEMO partition(dt='2023-03-01')
select
guid,
sessionId,
eventid,
count(1) as event_counts,
max(province) as province,
max(city) as city,
max(district) as district,
max(osName) as osName,
max(osVer) as osVer,
max(appVer) as appVer,
max(release_ch) as release_ch
from DWD_APP_ITR_DTL_DEMO
where dt='2023-03-01'
group by guid,sessionId,eventid;

事件次数,人员聚合表计算
建表
粒度 一个人,一种事件 的汇总数

hive>create table DWS_APP_ITR_AGU_DEMO(
guid bigint,
eventid string,
event_counts int,
province string,
city string,
district string,
osName string,
osVer string,
appVer string,
release_ch string
)
partitioned by (dt string)
stored as orc;

计算 ADS_APP_ITR_AGS_DEMO
分组聚合、求和

hive>insert into DWS_APP_ITR_AGU_DEMO partition(dt='2023-03-01')
select
guid,
eventid,
sum(event_counts) as event_counts,
max(province) as province,
max(city) as city,
max(district) as district,
max(osName) as osName,
max(osVer) as osVer,
max(appVer) as appVer,
max(release_ch) as release_ch
from DWS_APP_ITR_AGS_DEMO
where dt='2023-03-01'
group by guid,eventid;

交互事件多维分析报表

hive>create table ADS_APP_ITR_CUBE_DEMO(
eventid string,
event_counts bigint,
user_counts bigint,
province string,
city string,
district string,
osName string,
osVer string,
appVer string,
release_ch string
)
partitioned by (dt string)
stored as orc;

计算:

hive>insert into table ADS_APP_ITR_CUBE_DEMO partition(dt='2023-03-01')
select
eventid,
sum(event_counts) as event_counts,
count(1) as user_counts,
province,
city,
district,
osName,
osVer,
appVer,
release_ch
from DWS_APP_ITR_AGU_DEMO
group by eventid,province,city,district,osName,osVer,appVer,release_ch
grouping sets(
(eventid),
(eventid,province),
(eventid,province,city),
(eventid,province,city,district),
(eventid,osName),
(eventid,osName,osVer),
(eventid,appVer),
(eventid,release_ch)
);

 交互事件top3用户记录表 典型的topN

hive>create table ADS_APP_ITR_TOP3_USER(
eventid string,
guid bigint,
event_counts int,
rank_num int
)
partitioned by (dt string)
stored as orc;

源表  交互事件人员聚合表
计算:row_number  事件分区,事件数排序 desc  取 前3

hive>with temp as(
select
eventid,
guid,
event_counts,
row_number() over(partition by eventid order by event_counts desc) as rn
from DWS_APP_ITR_AGU_DEMO where dt='2023-03-01'
)
insert into table ADS_APP_ITR_TOP3_USER partition(dt='2023-03-01')
select
eventid,
guid,
event_counts,
rn as rank_num
from temp
where rn <=3;

报错:Error: Error while compiling statement: FAILED: SemanticException [Error 10016]: Line 15:6 Argument type mismatch 'rn': The 1st argument of EQUAL OR LESS THAN  is expected to a primitive type, but list is
 found (state=42000,code=10016)

原因:hive版本过低 

hive>show version();
+---------------------------+--+
|            _c0            |
+---------------------------+--+
| 1.1.0-cdh5.14.2 rUnknown  |
+---------------------------+--+

解决办法:升级hive版本

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值