SQL156 各个视频的平均完播率
问题:计算2021年里有播放记录的每个视频的完播率(结果保留三位小数),并按完播率降序排序
注:视频完播率是指完成播放次数占总播放次数的比例。简单起见,结束观看时间与开始播放时间的差>=视频时长时,视为完成播放
SELECT
video_id,
ROUND(comp_play_cnt/total_play_cnt,3) AS avg_comp_play_rate #结果保留三位小数
FROM
(
SELECT
video_id,
SUM(IF(play_time>=duration,1,0)) AS comp_play_cnt,
COUNT(start_time) AS total_play_cnt
FROM
(
SELECT
video_id,
duration,
start_time,
TIMESTAMPDIFF(SECOND,start_time,end_time) AS play_time
FROM tb_user_video_log LEFT JOIN tb_video_info USING(video_id)
WHERE YEAR(start_time)=2021 #计算2021年里有播放记录的每个视频的完播率
) AS T1
GROUP BY video_id
) AS T2
ORDER BY avg_comp_play_rate DESC #并按完播率降序排序
SQL157 平均播放进度大于60%的视频类别
问题:计算各类视频的平均播放进度,将进度大于60%的类别输出
注:
播放进度=播放时长÷视频时长*100%,当播放时长大于视频时长时,播放进度均记为100%
结果保留两位小数,并按播放进度倒序排序
SELECT
tag,
CONCAT(play_progress,'%') AS avg_play_progress
FROM
(
SELECT
tag,
ROUND(AVG(CASE WHEN TIMESTAMPDIFF(SECOND, start_time, end_time)<=duration
THEN TIMESTAMPDIFF(SECOND, start_time, end_time)/duration ELSE 1 END)*100,2) #结果保留两位小数
AS play_progress
FROM tb_user_video_log
JOIN tb_video_info USING(video_id)
GROUP BY tag
HAVING play_progress>60 #将进度大于60%的类别输出
ORDER BY play_progress DESC #按播放进度倒序排序
) AS T
SQL158 每类视频近一个月的转发量/率
问题:统计在有用户互动的最近一个月(按包含当天在内的近30天算,比如10月31日的近30天为10.2~10.31之间的数据)中,每类视频的转发量和转发率(保留3位小数)
注:转发率=转发量÷播放量,结果按转发率降序排序
SELECT
tag,
retweet_cut,
ROUND(retweet_cut/play_cnt,3) AS retweet_rate
FROM
(
SELECT
tag,
COUNT(uid) AS play_cnt,
SUM(if_retweet) AS retweet_cut
FROM
(
SELECT
uid,
tag,
if_retweet,
end_time
FROM tb_user_video_log JOIN tb_video_info USING(video_id)
) AS T1
WHERE TIMESTAMPDIFF(DAY,end_time,(SELECT MAX(end_time) FROM tb_user_video_log))<30
GROUP BY tag
) AS T2
ORDER BY retweet_rate DESC #结果按转发率降序排序
SQL159 每个创作者每月的涨粉率及截止当前的总粉丝量
问题:计算2021年里每个创作者每月的涨粉率及截止当月的总粉丝量
注:
涨粉率=(加粉量 - 掉粉量) / 播放量。结果按创作者ID、总粉丝量升序排序
if_follow-是否关注为1表示用户观看视频中关注了视频创作者,为0表示此次互动前后关注状态未发生变化,为2表示本次观看过程中取消了关注
SELECT
author,
month,
fans_growth_rate,
SUM(month_fans) OVER (PARTITION BY author ORDER BY month) AS total_fans
FROM
(
SELECT
author,
DATE_FORMAT(start_time,'%Y-%m') AS month,
ROUND(SUM(IF(if_follow=2,-1,if_follow))/COUNT(if_follow),3) AS fans_growth_rate,
SUM(IF(if_follow=2,-1,if_follow)) AS month_fans
FROM tb_user_video_log JOIN tb_video_info USING(video_id)
WHERE YEAR(start_time)=2021
GROUP BY author,month
) AS t
ORDER BY author,total_fans
SQL160 国庆期间每类视频点赞量和转发量
问题:统计2021年国庆头3天每类视频每天的近一周总点赞量和一周内最大单天转发量,结果按视频类别降序、日期升序排序。假设数据库中数据足够多,至少每个类别下国庆头3天及之前一周的每天都有播放记录
WITH DAY_TABLE AS(
SELECT
tag,
DATE(start_time) AS day_Z,
SUM(if_like) AS sum_like_cnt,
SUM(if_retweet) AS retweet_cnt
FROM tb_user_video_log LEFT JOIN tb_video_info USING(video_id)
GROUP BY tag,day_Z
)
SELECT
A.tag,
A.day_Z AS dt,
SUM(B.sum_like_cnt ) AS sum_like_cnt_7d,
MAX(B.retweet_cnt ) AS max_retweet_cnt_7d
FROM DAY_TABLE A JOIN DAY_TABLE B USING(tag)
WHERE A.day_Z BETWEEN '2021-10-01' AND '2021-10-03'
AND TIMESTAMPDIFF(DAY,B.day_Z,A.day_Z)<7
AND A.day_Z>=B.day_Z
GROUP BY tag,dt
ORDER BY tag DESC,dt
SQL161 近一个月发布的视频中热度最高的top3视频
问题:找出近一个月发布的视频中热度最高的top3视频
热度=(a*视频完播率+b*点赞数+c*评论数+d*转发数)*新鲜度;
新鲜度=1/(最近无播放天数+1);
当前配置的参数a,b,c,d分别为100、5、3、2。
最近播放日期以end_time-结束观看时间为准,假设为T,则最近一个月按[T-29, T]闭区间统计
结果中热度保留为整数,并按热度降序排序
SELECT
video_id,
ROUND((100*complete_rate+5*like_cnt+3*comm_cnt+2*ret_cnt)/(TIMESTAMPDIFF(DAY,recently_day,to_day)+1),0) AS hot_index
FROM
(
SELECT
video_id,
SUM(IF(TIMESTAMPDIFF(SECOND,start_time,end_time)>=duration,1,0))/COUNT(start_time) AS complete_rate,
SUM(if_like) AS like_cnt,
COUNT(comment_id) AS comm_cnt,
SUM(if_retweet) AS ret_cnt,
MAX(DATE(end_time)) AS recently_day,
MAX(DATE(release_time)) AS release_day,
MAX(to_day) AS to_day
FROM tb_user_video_log
JOIN tb_video_info USING(video_id)
LEFT JOIN (SELECT MAX(DATE(end_time)) AS to_day FROM tb_user_video_log) AS T1 ON to_day
GROUP BY video_id
HAVING TIMESTAMPDIFF(DAY,release_day,to_day)<30
) AS T2
ORDER BY hot_index DESC
LIMIT 3