背景:
最近有接到业务上的这么个需求,每天计算圈定用户的一些标签,同时把新增和移除标签的信息及当前用户的信息发给下游,举例:
这个用户昨日新增了tag3和tag2这个标签,同时当前codes也是这2个
任务设计:
首先,存储每天的全量的计算结果,因为新增和减少都需要和之前的数据作比较才知道,同时全部的codes用全量的结果
然后,用昨天的数据和前天的数据full join下,这样形成一个新增和减少的关系表
最后,按照指定格式取出来新增和减少的code及当前的codes值
帖下最后一步的处理逻辑:
insert overwrite table ba_user_push partition (dt)
select concat(
'{"add":['
,if(add_code is null,'',add_code)
,'],"client_str":'
,stringaddquotation(t1.client_str)
,',"registration_id":'
,stringaddquotation(t1.registration_id)
,',"codes":'
,if(all_code is null,'""',all_code)
,',"remove":['
,if(remove_code is null,'',remove_code)
,']}'
) as message
from (
select if(client_str1 is null,client_str2,client_str1) as client_str
,if(registration_id1 is null,registration_id2,registration_id1) as registration_id
,if(hupu_uid1 is null,hupu_uid2,hupu_uid1) as hupu_uid
from tmp_push_step1
where dt = ${bdp.system.bizdate}
and (registration_id1 is null --有减少
or registration_id2 is null) --有新增 只处理这2部分
group by if(client_str1 is null,client_str2,client_str1)
,if(registration_id1 is null,registration_id2,registration_id1)
) t1 left
join (
select client_str1 as client_str
,registration_id1 as registration_id
,stringaddquotation(wm_concat(',',code1)) as all_code
from bigdata2c.tmp_shihuo_push_step1
where dt = ${bdp.system.bizdate}
and client_str1 is not null
group by client_str1
,registration_id1
) t2
on t1.client_str = t2.client_str
and t1.registration_id = t2.registration_id left
join (
select client_str1 as client_str
,registration_id1 as registration_id
,wm_concat(',',stringaddquotation(code1)) as add_code
from bigdata2c.tmp_shihuo_push_step1
where dt = ${bdp.system.bizdate}
and registration_id2 is null
group by client_str1
,registration_id1
) t3
on t1.client_str = t3.client_str
and t1.registration_id = t3.registration_id left
join (
select client_str2 as client_str
,registration_id2 as registration_id
,wm_concat(',',stringaddquotation(code2)) as remove_code
from bigdata2c.tmp_shihuo_push_step1
where dt = ${bdp.system.bizdate}
and registration_id1 is null
group by client_str2
,registration_id2
) t4
on t1.client_str = t4.client_str
and t1.registration_id = t4.registration_id
;