Hive 广告事件分析

数据源:ODS_APP_LOG_DEMO

数据文件:
链接:https://pan.baidu.com/s/1s1gytjzZXbWRkWJYUaADTA 
提取码:oxmu 
 
解压后将文件夹内的数据放到hdfs上,hadoop hdfs -put xxx xxx

hive>create external table ODS_APP_LOG_DEMO
(
guid         bigint,
eventid      String,
event        Map<String, String>,
uid          String,
imei         String,
mac    String,
imsi    String,
osName    String,
osVer    String,
androidId    String,
resolution    String,
deviceType    String,
deviceId    String,
uuid    String,
appid    String,
appVer    String,
release_ch    String,
promotion_ch    String,
areacode    String,
longtitude    Double,
latitude    Double,
carrier    String,
netType    String,
cid_sn    String,
ip    String,
sessionId    String,
`timestamp`    bigint,
province   String,
city   String,
district   String
)
partitioned by (dt string)
stored as parquet;
 
hive>load data inpath '/mycluster/user/applogs' into table ODS_APP_LOG_DEMO partition(dt='2023-03-01');  //文件在hdfs上
 
hive> desc formatted ODS_APP_LOG_DEMO;
hive> select count(1) from ODS_APP_LOG_DEMO;

分析代码:

hive>select
guid,
eventid,
event['pgId'] as page_id,
event['adId'] as ad_id
from
ODS_APP_LOG_DEMO where dt='2023-03-01';

广告id,曝光次数,曝光人数,曝光人数最大页,点击次数,点击人数,点击人数最大页。

hive>create table ADS_APP_ADV_OVW_DEMO(
dt string,
adid string,
show_counts int,
show_users int,
show_max_page String,
click_counts int,
click_users int,
click_max_page string
)
stored as orc;

--广告id,曝光次数,曝光人数,点击次数,点击人数

hive>select
'2023-03-01' as dt,
event['adId'] as adid,
count(if(eventid='adShowEvent',1,null)) as show_counts,
count(distinct (if(eventid='adShowEvent',guid,null))) as show_users,
count(if(eventid='adClickEvent',1,null)) as click_counts,
count(distinct (if(eventid='adClickEvent',guid,null))) as click_users
from ODS_APP_LOG_DEMO where dt='2023-03-01'
group by event['adId'];

结果:
+-------------+-------+--------------+-------------+---------------+--------------+--+
|     dt      | adid  | show_counts  | show_users  | click_counts  | click_users  |
+-------------+-------+--------------+-------------+---------------+--------------+--+
| 2023-03-01  | NULL  | 0            | 0           | 0             | 0            |
| 2023-03-01  | 0     | 2367         | 209         | 483           | 151          |
| 2023-03-01  | 1     | 2493         | 212         | 333           | 141          |
| 2023-03-01  | 2     | 2466         | 211         | 283           | 133          |
| 2023-03-01  | 3     | 2441         | 209         | 194           | 110          |
| 2023-03-01  | 4     | 2436         | 212         | 158           | 91           |
| 2023-03-01  | 5     | 2514         | 212         | 110           | 80           |
| 2023-03-01  | 6     | 2480         | 209         | 86            | 65           |
| 2023-03-01  | 7     | 2429         | 210         | 64            | 49           |
| 2023-03-01  | 8     | 2467         | 211         | 27            | 22           |
| 2023-03-01  | 9     | 2403         | 213         | 22            | 21           |
+-------------+-------+--------------+-------------+---------------+--------------+--+

广告id,曝光人数最大页,点击人数最大页。

hive>with temp as(
select
event['adId'] as adid,
event['pgId'] as pgid,
eventid,
count(distinct guid) as user_counts
from ODS_APP_LOG_DEMO where dt='2023-03-01'
group by event['adId'],event['pgId'],eventid
),
result as (
select
adid,
pgid,
eventid,
user_counts,
row_number()over(partition by adid,eventid order by user_counts desc) as rn
from temp
)
select
adid,
pgid,
eventid
from result where rn<=2;
点击最大页和曝光最大页的前两名

最终完整代码:

hive>create table ADS_APP_ADV_OVM_DEMO(
dt string,
pgid string,
adid string,
show_counts int,
click_counts int,
show_users int,
click_users int,
click_max_page string,
show_max_page string
)
stored as orc;

with one as(
select
'2023-03-01' as dt,
event['adId'] as adid,
count(if(eventid='adShowEvent',1,null)) as show_counts,
count(distinct if(eventid='adShowEvent',guid,null)) as show_users,
count(if(eventid='adClickEvent',1,null)) as click_counts,
count(distinct if(eventid='adClickEvent',guid,null)) as click_user
from ODS_APP_LOG_DEMO where dt='2023-03-01'
group by event['adId']
),
temp as
(
select
event['adId'] as adid,
event['pgId'] as pgid,
eventid,
count(distinct guid) as user_counts
from ODS_APP_LOG_DEMO where dt='2023-03-01'
group by event['adId'],event['pgId'],eventid
),
result as(
select
adid,
pgid,
eventid,
user_counts,
row_number() over(partition by adid,eventid order by user_counts desc) as rn
from temp
),
two as(
select
adid,
pgid,
eventid,
user_counts
from result where rn=1
)
select
one.adid,
one.show_counts,
one.show_users,
one.click_counts,
one.click_user,
three.user_counts,
four.user_counts
from one join (select * from two where eventid='adShowEvent') three on one.adid=three.adid
join (select * from two where eventid='adClickEvent') four on one.adid=four.adid;

报错:Error: Error while compiling statement: FAILED: SemanticException [Error 10016]: Line 15:6 Argument type mismatch 'rn': The 1st argument of EQUAL OR LESS THAN  is expected to a primitive type, but list is
 found (state=42000,code=10016)

原因:hive版本过低 

hive>show version();
+---------------------------+--+
|            _c0            |
+---------------------------+--+
| 1.1.0-cdh5.14.2 rUnknown  |
+---------------------------+--+

解决办法:升级hive版本

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值