做题也是理解业务的一部分,通过这些题,逆向勾勒出了某音视频业务的部分业务逻辑和指标体系
提示:通过指标间关联,结合自己的理解逆向重构
输入:
DROP TABLE IF EXISTS tb_user_video_log, tb_video_info;
CREATE TABLE tb_user_video_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
video_id INT NOT NULL COMMENT '视频ID',
start_time datetime COMMENT '开始观看时间',
end_time datetime COMMENT '结束观看时间',
if_follow TINYINT COMMENT '是否关注',
if_like TINYINT COMMENT '是否点赞',
if_retweet TINYINT COMMENT '是否转发',
comment_id INT COMMENT '评论ID'
) CHARACTER SET utf8 COLLATE utf8_bin;
CREATE TABLE tb_video_info (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
video_id INT UNIQUE NOT NULL COMMENT '视频ID',
author INT NOT NULL COMMENT '创作者ID',
tag VARCHAR(16) NOT NULL COMMENT '类别标签',
duration INT NOT NULL COMMENT '视频时长(秒数)',
release_time datetime NOT NULL COMMENT '发布时间'
)CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_video_log(uid, video_id, start_time, end_time, if_follow, if_like, if_retweet, comment_id) VALUES
(101, 2001, '2021-09-01 10:00:00', '2021-09-01 10:00:20', 0, 1, 1, null)
,(105, 2002, '2021-09-10 11:00:00', '2021-09-10 11:00:30', 1, 0, 1, null)
,(101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:20', 1, 1, 1, null)
,(102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:15', 0, 0, 1, null)
,(103, 2001, '2021-10-01 11:00:50', '2021-10-01 11:01:15', 1, 1, 0, 1732526)
,(106, 2002, '2021-10-01 10:59:05', '2021-10-01 11:00:05', 2, 0, 0, null);
INSERT INTO tb_video_info(video_id, author, tag, duration, release_time) VALUES
(2001, 901, '影视', 30, '2021-01-01 7:00:00')
,(2002, 901, '影视', 60, '2021-01-01 7:00:00')
,(2003, 902, '旅游', 90, '2020-01-01 7:00:00')
,(2004, 902, '美女', 90, '2020-01-01 8:00:00');
复制
输出:
901|2021-09|0.500|1
901|2021-10|0.250|2
1.各个视频的平均完播率
#完播率 = 播放次数/播放总次数 三位小数round,降序desc
#完成播放 = 观看时间-开始播放的时间>=视频时长
#完播 = end_time - start_time
#播放次数 = count(end_time - start_time)
select
a.video_id,
round(sum(if(end_time-start_time>=duration,1,0))/count(start_time),3) as avg_comp_play_rate
from
tb_user_video_log as a
left join
tb_video_info as b
on
a.video_id = b.video_id
where
a.start_time like '2021%'
group by
a.video_id
order by
avg_comp_play_rate desc;
2.平均播放进度大于60%的视频
#计算平均播放进度,大于60%输出
#播放进度=播放时长/视频时长*100%
#播放时长=结束播放时间-开始播放时间,播放时长大于duration,进度100%
#结果保留两位小数:round,2),播放进度倒序,desc
select tag,
concat(round(avg(
if(timestampdiff(second,start_time,end_time)>duration,1,
timestampdiff(second,start_time,end_time)/duration)
)*100,2),'%') as avg_play_progress
from
tb_user_video_log a
left join
tb_video_info b
on
a.video_id = b.video_id
group by
tag
having
avg_play_progress>60
order by
avg_play_progress desc
3.每类视频近一个月的转发量/率
#转发率 = 转发量/播放量 ,按转发率降序排序,保留三位小数
#最近一个月的数据:timestampdiff(month)
select
tag,
sum(if(timestampdiff(day,start_time,
(select max(start_time) from tb_user_video_log))<30,if_retweet,0)) as retweet_cut,
round(sum(if(timestampdiff(day,start_time,(select max(start_time) from tb_user_video_log))<30,
if_retweet,0))/count(start_time),3) as retweet_rate
from
tb_user_video_log a
left join
tb_video_info b
on
a.video_id = b.video_id
group by
tag
order by
retweet_rate desc
4.每个创作者每月的涨粉率及截止当前的总粉丝量
#涨粉率 = (涨粉量-掉粉量)/播放量
#2021每个创作者、每月的涨粉率及截至当月的总粉丝量
select
author,
#涨粉率
date_format(start_time,'%Y-%m') as month,
round(sum(case if_follow
when 1 then 1
when 2 then -1
else 0 end)/count(start_time),3) fans_growth_rate,
sum(sum(case if_follow
when 1 then 1
when 2 then -1
else 0 end)) over(partition by author order by date_format(start_time,'%Y-%m')) as total_fans
from
tb_user_video_log log
left join
tb_video_info info
on
log.video_id=info.video_id
where
year(start_time)=2021
group by
author,month
order by
author,total_fans