HiveQL如何统计用户近一段时间活跃标签

create table if not exists schema_name.table_name
(
     user_id                            string comment '用户id'
    ,country                            string comment '国家'
    ,magic_ui_version                   string comment 'magic版本'
    ,device_type                        string comment '机型'
    ,starting_source                    string comment '启动来源'
    ,today_active_flg                   string comment '今日活跃标识(1活跃0:未活跃)'
    ,first_event_time                   string comment '用户首次活跃时间'
    ,last_event_time                    string comment '用户末次活跃时间'
    ,user_active_365_flg                string comment '用户365活跃标志'
    ,active_retention_2d_flg            string comment '活跃用户次日留存标识'
)comment '用户365活跃日表'
partitioned by (pt_d  string  comment '天分区')
row format delimited
fields terminated by '\001'
lines terminated by '\n'
stored as orc
;

insert overwrite table schema_name.table_name
partition (pt_d = '${hiveconf:pt_d}')
select
     user_id
    ,country
    ,magic_ui_version
    ,device_type
    ,starting_source
    ,today_active_flg
    ,first_event_time
    ,last_event_time
    ,user_active_365_flg
    ,if(substring(user_active_365_flg,2,1)='1',1,0)              as active_retention_2d_flg
from
(
    select
         user_id
        ,country
        ,magic_ui_version
        ,device_type
        ,starting_source
        ,max(today_active_flg)                                                              as today_active_flg
        ,min(first_event_time)                                                              as first_event_time
        ,max(last_event_time)                                                               as last_event_time
        ,concat(max(today_active_flg),substr(max(user_active_365_flg),1,364))               as user_active_365_flg
    from
    (
        select
             user_id
            ,country
            ,magic_ui_version
            ,device_type
            ,starting_source
            ,'1'                                                                            as today_active_flg
            ,rpad('',365,'0')                                                               as user_active_365_flg
            ,'${hiveconf:pt_d}'                                                             as first_event_time
            ,'${hiveconf:pt_d}'                                                             as last_event_time
        from table_name_inc_d--增量表
        where pt_d='${hiveconf:pt_d}'
        group by
             user_id
            ,country
            ,magic_ui_version
            ,device_type
            ,starting_source
 
        union all
    
        select
             user_id
            ,country
            ,magic_ui_version
            ,device_type
            ,starting_source
            ,'0'                                                    as today_active_flg
            ,user_active_365_flg
            ,first_event_time
            ,last_event_time
        from table_name_all_d--全量表
        where pt_d='${hiveconf:last_day}'
    )t1
    group by
         user_id
        ,country
        ,magic_ui_version
        ,device_type
        ,starting_source
)t2
;

通过设计一张全量数据累积模型,给用户打上活跃标签。可基于用户活跃标签数据模型,有效提高用户活跃相关标签计算性能

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值