CREATE TABLE IF NOT EXISTS recommend.ml_honey_feature
(
momo_id STRING COMMENT '用户ID'
,videoid STRING COMMENT '视频ID'
,score INT COMMENT '得分'
)
PARTITIONED BY (partition_date STRING COMMENT '分区日期')
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001'
COLLECTION ITEMS TERMINATED BY '\002'
MAP KEYS TERMINATED BY '\003'
LINES TERMINATED BY '\n'
STORED AS ORC;
INSERT OVERWRITE TABLE recommend.ml_honey_feature PARTITION(partition_date='20171110') --${hivevar:partition_date}
SELECT momo_id, videoid, score
FROM(
SELECT momo_id, videoid, IF(score1=null,0,score1)+score0 as score
FROM(
SELECT
T_1.momo_id0 AS momo_id
,T_1.videoid0 AS videoid
,T_1.view_score as score0
,T_2.score1 as score1
FROM(
SELECT
momo_id as momo_id0
,videoid as videoid0
,full_duration
,view_duration
,IF(view_duration*1.0/(full_duration)>0.5,1,0) AS view_score
FROM
(
select
momo_id
,videoid
,cast(full_duration as int) as full_duration
,cast(view_duration as int) as view_duration
from
(
select
momo_id
,from_json(video_json, 'map<string,string>')['videoid'] as videoid
,from_json(video_json, 'map<string,string>')['full_duration'] as full_duration
,from_json(video_json, 'map<string,string>')['view_duration'] as view_duration
from
(
SELECT
momo_id,
from_json(access_event_map['log_json'], 'array<string>') as lists
FROM online.bl_app_event_detail
WHERE partition_date='20171110'
and partition_business = 'honey'
and event_name ='honey.log.common.view'
) t1
lateral view explode(t1.lists)e as video_json
) t2
where videoid is not null
AND full_duration is not null
AND view_duration is not null
) t3
where IF(view_duration*1.0/(full_duration)>0.5,1,0)>0
) T_1
LEFT JOIN
(
SELECT
momo_id as momo_id1
,videoid as videoid1
,Like2
,comment2
,share2
,like2*4+comment2*10+share2*2 AS score1
FROM(
SELECT
momo_id
,videoid
,SUM(like1) as like2
,SUM(comment1) as comment2
,SUM(share1) as share2
FROM(
SELECT
momo_id
,access_event_map['videoid'] as videoid
,IF(event_name='honey.video.like.set',1, 0) AS like1
,IF(event_name='honey.video.comment.pub', 1, 0) AS comment1
,IF(event_name='honey.share.video.index',1, 0) AS share1
FROM online.bl_app_event_detail
WHERE datediff(CONCAT_WS('-', SUBSTR(partition_date, 1, 4), SUBSTR(partition_date, 5, 2), SUBSTR(partition_date, 7, 2)), CONCAT_WS('-', SUBSTR('20171110', 1, 4), SUBSTR('20171110', 5, 2), SUBSTR('20171110', 7, 2))) <= 0
AND datediff(CONCAT_WS('-', SUBSTR(partition_date, 1, 4), SUBSTR(partition_date, 5, 2), SUBSTR(partition_date, 7, 2)), CONCAT_WS('-', SUBSTR('20171110', 1, 4), SUBSTR('20171110', 5, 2), SUBSTR('20171110', 7, 2))) >= -29
AND partition_business='honey'
)T
GROUP BY momo_id,videoid
)T_A
WHERE like2>0 OR comment2>0 OR share2>0
) T_2
ON (T_1.momo_id0=T_2.momo_id1 and T_1.videoid0=T_2.videoid1)
) T_3
) T_4
WHERE score is not null
;
SQL
最新推荐文章于 2021-01-31 22:27:40 发布