文章目录
一、某音短视频
1.各个视频的平均完播率
题目:计算2021年里有播放记录的每个视频的完播率(结果保留三位小数),并按完播率降序排序
--输入:
DROP TABLE IF EXISTS tb_user_video_log, tb_video_info;
CREATE TABLE tb_user_video_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
video_id INT NOT NULL COMMENT '视频ID',
start_time datetime COMMENT '开始观看时间',
end_time datetime COMMENT '结束观看时间',
if_follow TINYINT COMMENT '是否关注',
if_like TINYINT COMMENT '是否点赞',
if_retweet TINYINT COMMENT '是否转发',
comment_id INT COMMENT '评论ID'
) CHARACTER SET utf8 COLLATE utf8_bin;
CREATE TABLE tb_video_info (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
video_id INT UNIQUE NOT NULL COMMENT '视频ID',
author INT NOT NULL COMMENT '创作者ID',
tag VARCHAR(16) NOT NULL COMMENT '类别标签',
duration INT NOT NULL COMMENT '视频时长(秒数)',
release_time datetime NOT NULL COMMENT '发布时间'
)CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_video_log(uid, video_id, start_time, end_time, if_follow, if_like, if_retweet, comment_id) VALUES
(101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:30', 0, 1, 1, null),
(102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:24', 0, 0, 1, null),
(103, 2001, '2021-10-01 11:00:00', '2021-10-01 11:00:34', 0, 1, 0, 1732526),
(101, 2002, '2021-09-01 10:00:00', '2021-09-01 10:00:42', 1, 0, 1, null),
(102, 2002, '2021-10-01 11:00:00', '2021-10-01 11:00:30', 1, 0, 1, null);
INSERT INTO tb_video_info(video_id, author, tag, duration, release_time) VALUES
(2001, 901, '影视', 30, '2021-01-01 7:00:00'),
(2002, 901, '美食', 60, '2021-01-01 7:00:00'),
(2003, 902, '旅游', 90, '2021-01-01 7:00:00');
select
video_id
,round(sum(if(diff>=duration,1,0))/count(diff),3) avg_comp_play_rate # 视频完播率=完成播放次数/总播放次数
from (
select
a.video_id # 每个视频
,timestampdiff(second,a.start_time,a.end_time) diff # 结束观看时间与开始播放时间的
,b.duration # 视频时长
from (
select
video_id
,start_time
,end_time
from tb_user_video_log
) a left join ( # 有播放记录(用左连接)
select
video_id
,duration
from tb_video_info
) b on a.video_id=b.video_id
where year(start_time)='2021' # 2021年
) a1
group by video_id
order by avg_comp_play_rate desc
;
2.平均播放进度大于60%的视频类别
题目:计算各类视频的平均播放进度,将进度大于60%的类别输出。
--输入:
DROP TABLE IF EXISTS tb_user_video_log, tb_video_info;
CREATE TABLE tb_user_video_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
video_id INT NOT NULL COMMENT '视频ID',
start_time datetime COMMENT '开始观看时间',
end_time datetime COMMENT '结束观看时间',
if_follow TINYINT COMMENT '是否关注',
if_like TINYINT COMMENT '是否点赞',
if_retweet TINYINT COMMENT '是否转发',
comment_id INT COMMENT '评论ID'
) CHARACTER SET utf8 COLLATE utf8_bin;
CREATE TABLE tb_video_info (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
video_id INT UNIQUE NOT NULL COMMENT '视频ID',
author INT NOT NULL COMMENT '创作者ID',
tag VARCHAR(16) NOT NULL COMMENT '类别标签',
duration INT NOT NULL COMMENT '视频时长(秒数)',
release_time datetime NOT NULL COMMENT '发布时间'
)CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_video_log(uid, video_id, start_time, end_time, if_follow, if_like, if_retweet, comment_id) VALUES
(101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:30', 0, 1, 1, null),
(102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:24', 0, 0, 1, null),
(103, 2001, '2021-10-01 11:00:00', '2021-10-01 11:00:34', 0, 1, 0, 1732526),
(101, 2002, '2021-09-01 10:00:00', '2021-09-01 10:00:42', 1, 0, 1, null),
(102, 2002, '2021-10-01 11:00:00', '2021-10-01 11:00:30', 1, 0, 1, null);
INSERT INTO tb_video_info(video_id, author, tag, duration, release_time) VALUES
(2001, 901, '影视', 30, '2021-01-01 7:00:00'),
(2002, 901, '美食', 60, '2021-01-01 7:00:00'),
(2003, 902, '旅游', 90, '2021-01-01 7:00:00');
select
tag
,concat(round(avg(if(diff>duration,100,diff/duration*100)),2),'%') play_progress # 平均播放进度
from (
select
b.tag
,uid
,a.video_id
,timestampdiff(second,start_time,end_time) diff
,b.duration
from (
select
uid
,video_id
,start_time
,end_time
from tb_user_video_log
) a left join (
select
video_id
,tag
,duration
from tb_video_info
) b on a.video_id=b.video_id
) a1
group by tag
having avg(if(diff>duration,100,diff/duration*100))>60 # 进度大于60%
order by play_progress desc
;
3.每类视频近一个月的转发量/率
题目:统计在有用户互动的最近一个月(按包含当天在内的近30天算,比如10月31日的近30天为10.2~10.31之间的数据)中,每类视频的转发量和转发率(保留3位小数)。
--输入:
DROP TABLE IF EXISTS tb_user_video_log, tb_video_info;
CREATE TABLE tb_user_video_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
video_id INT NOT NULL COMMENT '视频ID',
start_time datetime COMMENT '开始观看时间',
end_time datetime COMMENT '结束观看时间',
if_follow TINYINT COMMENT '是否关注',
if_like TINYINT COMMENT '是否点赞',
if_retweet TINYINT COMMENT '是否转发',
comment_id INT COMMENT '评论ID'
) CHARACTER SET utf8 COLLATE utf8_bin;
CREATE TABLE tb_video_info (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
video_id INT UNIQUE NOT NULL COMMENT '视频ID',
author INT NOT NULL COMMENT '创作者ID',
tag VARCHAR(16) NOT NULL COMMENT '类别标签',
duration INT NOT NULL COMMENT '视频时长(秒数)',
release_time datetime NOT NULL COMMENT '发布时间'
)CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_video_log(uid, video_id, start_time, end_time, if_follow, if_like, if_retweet, comment_id) VALUES
(101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:30', 0, 1, 1, null),
(102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:24', 0, 0, 1, null),
(103, 2001, '2021-10-01 11:00:00', '2021-10-01 11:00:34', 0, 1, 0, 1732526),
(101, 2002, '2021-09-01 10:00:00', '2021-09-01 10:00:42', 1, 0, 1, null),
(102, 2002, '2021-10-01 11:00:00', '2021-10-01 11:00:30', 1, 0, 1, null);
INSERT INTO tb_video_info(video_id, author, tag, duration, release_time) VALUES
(2001, 901, '影视', 30, '2021-01-01 7:00:00'),
(2002, 901, '美食', 60, '2021-01-01 7:00:00'),
(2003, 902, '旅游', 90, '2021-01-01 7:00:00');
select
tag
,sum(if_retweet) retweet_cut # 转发量
,round(sum(if_retweet)/count(if_retweet),3) retweet_rate # 转发率
from (
select
a.uid
,a.video_id
,date(a.start_time) start_time
,max(date(a.start_time)) over() max_start_time # 最大日期(并不是对于各类视频计算最大值,而是整体的日期最大值)
,b.tag
,a.if_retweet
from (
select
uid
,video_id
,start_time
,if_retweet
from tb_user_video_log
) a left join (
select
video_id
,tag
from tb_video_info
) b on a.video_id=b.video_id
) a1
where datediff(max_start_time,start_time)<=29 # 在有用户互动的最近一个月(按包含当天在内的近30天算,比如10月31日的近30天为10.2~10.31之间的数据)
-- 或者start_time>date_sub(max_start_time,interval 30 day)
group by tag
order by retweet_rate desc
;
4.每个创作者每月的涨粉率及截止当前的总粉丝量
题目:计算2021年里每个创作者每月的涨粉率及截止当月的总粉丝量。
--输入:
DROP TABLE IF EXISTS tb_user_video_log, tb_video_info;
CREATE TABLE tb_user_video_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
video_id INT NOT NULL COMMENT '视频ID',
start_time datetime COMMENT '开始观看时间',
end_time datetime COMMENT '结束观看时间',
if_follow TINYINT COMMENT '是否关注',
if_like TINYINT COMMENT '是否点赞',
if_retweet TINYINT COMMENT '是否转发',
comment_id INT COMMENT '评论ID'
) CHARACTER SET utf8 COLLATE utf8_bin;
CREATE TABLE tb_video_info (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
video_id INT UNIQUE NOT NULL COMMENT '视频ID',
author INT NOT NULL COMMENT '创作者ID',
tag VARCHAR(16) NOT NULL COMMENT '类别标签',
duration INT NOT NULL COMMENT '视频时长(秒数)',
release_time datetime NOT NULL COMMENT '发布时间'
)CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_video_log(uid, video_id, start_time, end_time, if_follow, if_like, if_retweet, comment_id) VALUES
(101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:30', 0, 1, 1, null),
(102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:24', 0, 0, 1, null),
(103, 2001, '2021-10-01 11:00:00', '2021-10-01 11:00:34', 0, 1, 0, 1732526),
(101, 2002, '2021-09-01 10:00:00', '2021-09-01 10:00:42', 1, 0, 1, null),
(102, 2002, '2021-10-01 11:00:00', '2021-10-01 11:00:30', 1, 0, 1, null);
INSERT INTO tb_video_info(video_id, author, tag, duration, release_time) VALUES
(2001, 901, '影视', 30, '2021-01-01 7:00:00'),
(2002, 901, '美食', 60, '2021-01-01 7:00:00'),
(2003, 902, '旅游', 90, '2021-01-01 7:00:00');
select
author
,month
,round((fans_growth-fans_decrease)/total_play,3) fans_growth_rate # 每个创作者每月的涨粉率
,sum((fans_growth-fans_decrease)) over(partition by author order by month) total_fans # 每个创作者截止当月的总粉丝量
from (
select
b.author
,date_format(a.start_time,'%Y-%m') month
,sum(if(a.if_follow=1,1,0)) fans_growth # 加粉量
,sum(if(a.if_follow=2,1,0)) fans_decrease # 掉粉量
,count(a.start_time) total_play # 总播放量
from (
select
video_id
,start_time
,if_follow
from tb_user_video_log
) a left join (
select
video_id
,author
from tb_video_info
) b on a.video_id=b.video_id
where year(a.start_time)='2021' # 2021年
group by b.author,date_format(a.start_time,'%Y-%m')
) a1
order by author,total_fans
;
5.国庆期间每类视频点赞量和转发量
题目:统计2021年国庆头3天每类视频每天的近一周总点赞量和一周内最大单天转发量,结果按视频类别降序、日期升序排序。假设数据库中数据足够多,至少每个类别下国庆头3天及之前一周的每天都有播放记录。
--输入:
DROP TABLE IF EXISTS tb_user_video_log, tb_video_info;
CREATE TABLE tb_user_video_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
video_id INT NOT NULL COMMENT '视频ID',
start_time datetime COMMENT '开始观看时间',
end_time datetime COMMENT '结束观看时间',
if_follow TINYINT COMMENT '是否关注',
if_like TINYINT COMMENT '是否点赞',
if_retweet TINYINT COMMENT '是否转发',
comment_id INT COMMENT '评论ID'
) CHARACTER SET utf8 COLLATE utf8_bin;
CREATE TABLE tb_video_info (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
video_id INT UNIQUE NOT NULL COMMENT '视频ID',
author INT NOT NULL COMMENT '创作者ID',
tag VARCHAR(16) NOT NULL COMMENT '类别标签',
duration INT NOT NULL COMMENT '视频时长(秒数)',
release_time datetime NOT NULL COMMENT '发布时间'
)CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_video_log(uid, video_id, start_time, end_time, if_follow, if_like, if_retweet, comment_id) VALUES
(101, 2001, '2021-09-24 10:00:00', '2021-09-24 10:00:20', 1, 1, 0, null)
,(105, 2002, '2021-09-25 11:00:00', '2021-09-25 11:00:30', 0, 0, 1, null)
,(102, 2002, '2021-09-25 11:00:00', '2021-09-25 11:00:30', 1, 1, 1, null)
,(101, 2002, '2021-09-26 11:00:00', '2021-09-26 11:00:30', 1, 0, 1, null)
,(101, 2002, '2021-09-27 11:00:00', '2021-09-27 11:00:30', 1, 1, 0, null)
,(102, 2002, '2021-09-28 11:00:00', '2021-09-28 11:00:30', 1, 0, 1, null)
,(103, 2002, '2021-09-29 11:00:00', '2021-09-29 11:00:30', 1, 0, 1, null)
,(102, 2002, '2021-09-30 11:00:00', '2021-09-30 11:00:30', 1, 1, 1, null)
,(101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:20', 1, 1, 0, null)
,(102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:15', 0, 0, 1, null)
,(103, 2001, '2021-10-01 11:00:50', '2021-10-01 11:01:15', 1, 1, 0, 1732526)
,(106, 2002, '2021-10-02 10:59:05', '2021-10-02 11:00:05', 2, 0, 1, null)
,(107, 2002, '2021-10-02 10:59:05', '2021-10-02 11:00:05', 1, 0, 1, null)
,(108, 2002, '2021-10-02 10:59:05', '2021-10-02 11:00:05', 1, 1, 1, null)
,(109, 2002, '2021-10-03 10:59:05', '2021-10-03 11:00:05', 0, 1, 0, null);
INSERT INTO tb_video_info(video_id, author, tag, duration, release_time) VALUES
(2001, 901, '旅游', 30, '2020-01-01 7:00:00')
,(2002, 901, '旅游', 60, '2021-01-01 7:00:00')
,(2003, 902, '影视', 90, '2020-01-01 7:00:00')
,(2004, 902, '美女', 90, '2020-01-01 8:00:00');
select
tag
,dt
,sum_like_cnt_7d # 2021年国庆头3天每天的近一周总点赞量
,max_retweet_cnt_7d # 2021年国庆头3天每天的一周内最大单天转发量
from (
select
b.tag
,date(a.start_time) dt
,sum(if_like) like_cnt # 每类视频每天的点赞数
,sum(if_retweet) retweet_cnt # 每类视频每天的转发量
,sum(sum(if_like)) over(partition by b.tag order by date(a.start_time) desc rows between current row and 6 following) sum_like_cnt_7d # 每天的近一周总点赞量
,max(sum(if_retweet)) over(partition by b.tag order by date(a.start_time) desc rows between current row and 6 following) max_retweet_cnt_7d # 一周内最大单天转发量
from (
select
video_id
,start_time
,if_like
,if_retweet
from tb_user_video_log
) a join (
select
video_id
,tag
from tb_video_info
) b on a.video_id=b.video_id
where date(start_time) between '2021-09-25' and '2021-10-03' # 2021年国庆头3天(10.01~10.03)里10.01的近7天(9.25~10.01)
group by b.tag,date(a.start_time)
) a1
where a1.dt between '2021-10-01' and '2021-10-03' # 2021年国庆头3天
order by tag desc,dt asc
;
开窗函数知识点:
rows n perceding:从当前行到前n行(一共n+1行)
rang/rows between 边界规则1 and 边界规则2:rang表示按照值的范围进行定义框架,rows表示按照行的范围进行定义框架
rows between 2 perceding and 2 following #当前行往前2行+当前行+当前行往后2行(一共5行)
rows between 1 following 3 following #当前行的后1——>后3(共3行)
rows between unbounded preceding and current row #从第一行到当前行
6.近一个月发布的视频中热度最高的top3视频
题目:找出近一个月发布的视频中热度最高的top3视频。
--输入:
DROP TABLE IF EXISTS tb_user_video_log, tb_video_info;
CREATE TABLE tb_user_video_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
video_id INT NOT NULL COMMENT '视频ID',
start_time datetime COMMENT '开始观看时间',
end_time datetime COMMENT '结束观看时间',
if_follow TINYINT COMMENT '是否关注',
if_like TINYINT COMMENT '是否点赞',
if_retweet TINYINT COMMENT '是否转发',
comment_id INT COMMENT '评论ID'
) CHARACTER SET utf8 COLLATE utf8_bin;
CREATE TABLE tb_video_info (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
video_id INT UNIQUE NOT NULL COMMENT '视频ID',
author INT NOT NULL COMMENT '创作者ID',
tag VARCHAR(16) NOT NULL COMMENT '类别标签',
duration INT NOT NULL COMMENT '视频时长(秒数)',
release_time datetime NOT NULL COMMENT '发布时间'
)CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_video_log(uid, video_id, start_time, end_time, if_follow, if_like, if_retweet, comment_id) VALUES
(101, 2001, '2021-09-24 10:00:00', '2021-09-24 10:00:30', 1, 1, 1, null)
,(101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:31', 1, 1, 0, null)
,(102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:35', 0, 0, 1, null)
,(103, 2001, '2021-10-03 11:00:50', '2021-10-03 11:01:35', 1, 1, 0, 1732526)
,(106, 2002, '2021-10-02 10:59:05', '2021-10-02 11:00:04', 2, 0, 1, null)
,(107, 2002, '2021-10-02 10:59:05', '2021-10-02 11:00:06', 1, 0, 0, null)
,(108, 2002, '2021-10-02 10:59:05', '2021-10-02 11:00:05', 1, 1, 1, null)
,(109, 2002, '2021-10-03 10:59:05', '2021-10-03 11:00:01', 0, 1, 0, null)
,(105, 2002, '2021-09-25 11:00:00', '2021-09-25 11:00:30', 1, 0, 1, null)
,(101, 2003, '2021-09-26 11:00:00', '2021-09-26 11:00:30', 1, 0, 0, null)
,(101, 2003, '2021-09-30 11:00:00', '2021-09-30 11:00:30', 1, 1, 0, null);
INSERT INTO tb_video_info(video_id, author, tag, duration, release_time) VALUES
(2001, 901, '旅游', 30, '2021-09-05 7:00:00')
,(2002, 901, '旅游', 60, '2021-09-05 7:00:00')
,(2003, 902, '影视', 90, '2021-09-05 7:00:00')
,(2004, 902, '影视', 90, '2021-09-05 8:00:00');
select
video_id
,round((100*comp_play_rate+5*like_cnt+3*comment_cnt+2*retweet_cnt)*fresh_rate,0) hot_index #-- 热度
from (
select
a.video_id
,sum(if(timestampdiff(second,start_time,end_time)>=duration, 1,0))/count(start_time) comp_play_rate # 视频完播率:完成播放次数占总播放次数的比例
,sum(if(if_like=1,1,0)) like_cnt # 点赞数
,count(comment_id) comment_cnt # 评论数
,sum(if(if_retweet=1,1,0)) retweet_cnt # 转发数
,avg(datediff(max_end_time,max_video_end_time)) no_play_days # 最近无播放天数
,1/(avg(datediff(max_end_time,max_video_end_time))+1) fresh_rate # 新鲜度
from (
select
*
,max(date(end_time)) over(partition by video_id) max_video_end_time # 每个视频最大的结束观看日期
,max(date(end_time)) over() max_end_time # 全部视频最大的结束观看日期
from tb_user_video_log
) a left join (
select
video_id
,duration
,release_time
from tb_video_info
) b on a.video_id=b.video_id
where datediff(max_end_time,date(release_time))<=29 # 近一个月(最近播放日期以end_time-结束观看时间为准,假设为T,则最近一个月按[T-29, T]闭区间统计)
group by a.video_id
) a1
order by hot_index desc
limit 3 # 热度最高的top3视频
;
# 关于where datediff(max_end_time,date(release_time))<=29
# 后面那个不可以用end_time,而是应该用release_time, 因为存在视频发布当天并没有播放记录的情况。
# 举个例子:10.03往前推30天是09.04。我们要获取的统计时间区间是09.04-10.03。假设某个视频09.01发布,但是5天后才有第一次播放记录,
# 那么这个视频最早的end_time是09.06,按照这个end_time,用DATEDIFF法来生成,得到的时间区间是09.06-10.03,明显不完整。个人感觉题目也是没有完全讲清楚。。
二、用户增长场景(某度信息流)
1.2021年11月每天的人均浏览文章时长
题目:统计2021年11月每天的人均浏览文章时长(秒数),结果保留1位小数,并按时长由短到长排序。
--输入:
DROP TABLE IF EXISTS tb_user_log;
CREATE TABLE tb_user_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
artical_id INT NOT NULL COMMENT '视频ID',
in_time datetime COMMENT '进入时间',
out_time datetime COMMENT '离开时间',
sign_in TINYINT DEFAULT 0 COMMENT '是否签到'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_log(uid, artical_id, in_time, out_time, sign_in) VALUES
(101, 9001, '2021-11-01 10:00:00', '2021-11-01 10:00:31', 0),
(102, 9001, '2021-11-01 10:00:00', '2021-11-01 10:00:24', 0),
(102, 9002, '2021-11-01 11:00:00', '2021-11-01 11:00:11', 0),
(101, 9001, '2021-11-02 10:00:00', '2021-11-02 10:00:50', 0),
(102, 9002, '2021-11-02 11:00:01', '2021-11-02 11:00:24', 0);
select
dt
,round(avg(viiew_len_sec),1) avg_viiew_len_sec # 每天的人均浏览文章时长(秒数)
from (
select
date(in_time) dt
,uid
,sum(timestampdiff(second,in_time,out_time)) viiew_len_sec # 每天每个人浏览文章时长(秒数)
from tb_user_log
where substr(in_time,1,7)='2021-11' # 2021年11月
and artical_id<>'0' # artical_id-文章ID为0表示用户在非文章内容页(比如App内的列表页、活动页等)
group by date(in_time),uid
) a
group by dt
order by avg_viiew_len_sec
;
2.每篇文章同一时刻最大在看人数
题目:统计每篇文章同一时刻最大在看人数,如果同一时刻有进入也有离开时,先记录用户数增加再记录减少,结果按最大人数降序。
--输入:
DROP TABLE IF EXISTS tb_user_log;
CREATE TABLE tb_user_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
artical_id INT NOT NULL COMMENT '视频ID',
in_time datetime COMMENT '进入时间',
out_time datetime COMMENT '离开时间',
sign_in TINYINT DEFAULT 0 COMMENT '是否签到'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_log(uid, artical_id, in_time, out_time, sign_in) VALUES
(101, 9001, '2021-11-01 10:00:00', '2021-11-01 10:00:11', 0),
(102, 9001, '2021-11-01 10:00:09', '2021-11-01 10:00:38', 0),
(103, 9001, '2021-11-01 10:00:28', '2021-11-01 10:00:58', 0),
(104, 9002, '2021-11-01 11:00:45', '2021-11-01 11:01:11', 0),
(105, 9001, '2021-11-01 10:00:51', '2021-11-01 10:00:59', 0),
(106, 9002, '2021-11-01 11:00:55', '2021-11-01 11:01:24', 0),
(107, 9001, '2021-11-01 10:00:01', '2021-11-01 10:01:50', 0);
select
artical_id
,max(in_time_num) max_uv # 每篇文章同一时刻最大在看人数
from (
select
artical_id
,time
,tag
,sum(tag) over(partition by artical_id order by time asc,tag desc) in_time_num # 每一时刻的在看人数(如果同一时刻有进入也有离开时,先记录用户数增加再记录减少)
from (
select
artical_id
,uid
,in_time time
,1 tag # 进入标为1
from tb_user_log
where artical_id<>0 # artical_id-文章ID为0表示用户在非文章内容页(比如App内的列表页、活动页等)
union all
select
artical_id
,uid
,out_time time
,-1 tag # 离开标为-1
from tb_user_log
where artical_id<>0 # artical_id-文章ID为0表示用户在非文章内容页(比如App内的列表页、活动页等)
) a
) a1
group by artical_id
order by max_uv desc
;
3.2021年11月每天新用户的次日留存率
题目:统计2021年11月每天新用户的次日留存率(保留2位小数)
--输入:
DROP TABLE IF EXISTS tb_user_log;
CREATE TABLE tb_user_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
artical_id INT NOT NULL COMMENT '视频ID',
in_time datetime COMMENT '进入时间',
out_time datetime COMMENT '离开时间',
sign_in TINYINT DEFAULT 0 COMMENT '是否签到'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_log(uid, artical_id, in_time, out_time, sign_in) VALUES
(101, 0, '2021-11-01 10:00:00', '2021-11-01 10:00:42', 1),
(102, 9001, '2021-11-01 10:00:00', '2021-11-01 10:00:09', 0),
(103, 9001, '2021-11-01 10:00:01', '2021-11-01 10:01:50', 0),
(101, 9002, '2021-11-02 10:00:09', '2021-11-02 10:00:28', 0),
(103, 9002, '2021-11-02 10:00:51', '2021-11-02 10:00:59', 0),
(104, 9001, '2021-11-02 10:00:28', '2021-11-02 10:00:50', 0),
(101, 9003, '2021-11-03 11:00:55', '2021-11-03 11:01:24', 0),
(104, 9003, '2021-11-03 11:00:45', '2021-11-03 11:00:55', 0),
(105, 9003, '2021-11-03 11:00:53', '2021-11-03 11:00:59', 0),
(101, 9002, '2021-11-04 11:00:55', '2021-11-04 11:00:59', 0);
-- 方法1
select
dt
,round(count(dt_next)/count(uid),2) uv_left_rate
from (
select
uid
,dt
,dt_next
from (
select
a.uid
,a.dt
,b.dt dt_next
,min(a.dt) over(partition by a.uid) min_dt # 每个用户最早的进入时间(用来判断新用户)
from (
select
uid
,date(in_time) dt # 进入时间
from tb_user_log
union # 如果in_time-进入时间和out_time-离开时间跨天了,在两天里都记为该用户活跃过
select
uid
,date(out_time) dt # 离开时间
from tb_user_log
) a left join(
select
uid
,date(in_time) dt
from tb_user_log
union
select
uid
,date(out_time) dt
from tb_user_log
) b on a.uid=b.uid
and date_add(a.dt,interval 1 day)=b.dt # 第二天又活跃
) a1
where dt=min_dt # 新用户
and date_format(a1.dt,'%Y-%m') = '2021-11' # 2021年11月
) a2
group by dt
order by dt
;
-- 方法2
select
a.dt
,round(count(b.dt)/count(a.uid),2) uv_left_rate
from (
select
uid
,min(date(in_time)) dt # 新用户及活跃当天(每个用户第一次活跃的日期)
from tb_user_log
group by uid
) a left join ( # 新用户第二天还活跃的日期
select
uid
,date(in_time) dt
from tb_user_log
union # 如果in_time-进入时间和out_time-离开时间跨天了,在两天里都记为该用户活跃过
select
uid
,date(out_time) dt
from tb_user_log
) b on a.uid=b.uid
and date_add(a.dt,interval 1 day)=b.dt
where date_format(a.dt,'%Y-%m') = '2021-11' # 2021年11月
group by a.dt
order by a.dt
;
4.统计活跃间隔对用户分级结果
题目:统计活跃间隔对用户分级后,各活跃等级用户占比,结果保留两位小数,且按占比降序排序。
--输入:
DROP TABLE IF EXISTS tb_user_log;
CREATE TABLE tb_user_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
artical_id INT NOT NULL COMMENT '视频ID',
in_time datetime COMMENT '进入时间',
out_time datetime COMMENT '离开时间',
sign_in TINYINT DEFAULT 0 COMMENT '是否签到'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_log(uid, artical_id, in_time, out_time, sign_in) VALUES
(109, 9001, '2021-08-31 10:00:00', '2021-08-31 10:00:09', 0),
(109, 9002, '2021-11-04 11:00:55', '2021-11-04 11:00:59', 0),
(108, 9001, '2021-09-01 10:00:01', '2021-09-01 10:01:50', 0),
(108, 9001, '2021-11-03 10:00:01', '2021-11-03 10:01:50', 0),
(104, 9001, '2021-11-02 10:00:28', '2021-11-02 10:00:50', 0),
(104, 9003, '2021-09-03 11:00:45', '2021-09-03 11:00:55', 0),
(105, 9003, '2021-11-03 11:00:53', '2021-11-03 11:00:59', 0),
(102, 9001, '2021-10-30 10:00:00', '2021-10-30 10:00:09', 0),
(103, 9001, '2021-10-21 10:00:00', '2021-10-21 10:00:09', 0),
(101, 0, '2021-10-01 10:00:00', '2021-10-01 10:00:42', 1);
-- 方法1
select
user_grade
,round(count(uid)/max(user_num),2) ratio # 占比
from (
select
uid
,first_dt
,last_dt
,max_dt
,case when datediff(max_dt,last_dt)>=30 then '流失用户' # 流失用户(近30天未活跃但更早前活跃过) (最大日期-最晚活跃日期)>=30天
when datediff(max_dt,last_dt)>=7 and datediff(max_dt,last_dt)<30 then '沉睡用户' # 沉睡用户(近7天未活跃但更早前活跃过) 7<=(最大日期-最晚活跃日期)<30
when datediff(max_dt,last_dt)<7 and datediff(max_dt,first_dt)>7 then '忠实用户' # 忠实用户(近7天活跃过且非新晋用户) (最大日期-最晚活跃日期)<7 且 (最大日期-最早活跃日期)>7
when datediff(max_dt,last_dt)<7 and datediff(max_dt,first_dt)<7 then '新晋用户' # 新晋用户(近7天新增) (最大日期-最晚活跃日期)<7 且 (最大日期-最早活跃日期)<7
else null end user_grade
,user_num
from (
select
uid
,min(date(in_time)) first_dt # 每个用户最早活跃日期
,max(date(out_time)) last_dt # 每个用户最晚活跃日期
,max(max(date(out_time))) over() max_dt # 数据中所有日期的最大值,也就是今天
,count(uid) over() user_num # 全部用户数
from tb_user_log
group by uid
) a
) a1
group by user_grade
order by ratio desc
;
-- 方法2
select
user_grade
,round(count(uid)/max(user_num),2) ratio # 占比
from (
select
uid
,first_dt
,last_dt
,max_dt
,case when datediff(max_dt,last_dt)>=30 then '流失用户' # 流失用户(近30天未活跃但更早前活跃过) (最大日期-最晚活跃日期)>=30天
when datediff(max_dt,last_dt)>=7 then '沉睡用户' # 沉睡用户(近7天未活跃但更早前活跃过) (最大日期-最晚活跃日期)>=7 【隐藏条件:(最大日期-最晚活跃日期)<30】
when datediff(max_dt,first_dt)<7 then '新晋用户' # 新晋用户(近7天新增) (最大日期-最早活跃日期)<7 【隐藏条件:(最大日期-最晚活跃日期)<7】
else '忠实用户' end user_grade # 忠实用户(近7天活跃过且非新晋用户) 【剩余条件:(最大日期-最晚活跃日期)<7 且 (最大日期-最早活跃日期)>7】
,user_num
from (
select
uid
,min(date(in_time)) first_dt # 每个用户最早活跃日期
,max(date(out_time)) last_dt # 每个用户最晚活跃日期
,max(max(date(out_time))) over() max_dt # 数据中所有日期的最大值,也就是今天
,count(uid) over() user_num # 全部用户数
from tb_user_log
group by uid
) a
) a1
group by user_grade
order by ratio desc
;
5.每天的日活数及新用户占比
题目:统计每天的日活数及新用户占比
--输入:
DROP TABLE IF EXISTS tb_user_log;
CREATE TABLE tb_user_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
artical_id INT NOT NULL COMMENT '视频ID',
in_time datetime COMMENT '进入时间',
out_time datetime COMMENT '离开时间',
sign_in TINYINT DEFAULT 0 COMMENT '是否签到'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_log(uid, artical_id, in_time, out_time, sign_in) VALUES
(101, 9001, '2021-10-31 10:00:00', '2021-10-31 10:00:09', 0),
(102, 9001, '2021-10-31 10:00:00', '2021-10-31 10:00:09', 0),
(101, 0, '2021-11-01 10:00:00', '2021-11-01 10:00:42', 1),
(102, 9001, '2021-11-01 10:00:00', '2021-11-01 10:00:09', 0),
(108, 9001, '2021-11-01 10:00:01', '2021-11-01 10:01:50', 0),
(108, 9001, '2021-11-02 10:00:01', '2021-11-02 10:01:50', 0),
(104, 9001, '2021-11-02 10:00:28', '2021-11-02 10:00:50', 0),
(106, 9001, '2021-11-02 10:00:28', '2021-11-02 10:00:50', 0),
(108, 9001, '2021-11-03 10:00:01', '2021-11-03 10:01:50', 0),
(109, 9002, '2021-11-03 11:00:55', '2021-11-03 11:00:59', 0),
(104, 9003, '2021-11-03 11:00:45', '2021-11-03 11:00:55', 0),
(105, 9003, '2021-11-03 11:00:53', '2021-11-03 11:00:59', 0),
(106, 9003, '2021-11-03 11:00:45', '2021-11-03 11:00:55', 0);
select
dt
,count(distinct uid) dau # 当天活跃用户数(日活数)
,round(count(distinct if(dt=min_dt,uid,null))/count(distinct uid),2) # 新用户占比=当天的新用户数÷当天活跃用户数(日活数)
from (
select
uid
,dt
,min(dt) over(partition by uid) min_dt # 每个用户第一次活跃的日期
from (
select
uid
,date(in_time) dt # 进入时间
from tb_user_log
union # 如果in_time-进入时间和out_time-离开时间跨天了,在两天里都记为该用户活跃过
select
uid
,date(out_time) dt # 离开时间
from tb_user_log
) a
) a1
group by dt
order by dt
;
6.连续签到领金币
题目:计算每个用户2021年7月以来每月获得的金币数(该活动到10月底结束,11月1日开始的签到不再获得金币)。结果按月份、ID升序排序。
--输入:
DROP TABLE IF EXISTS tb_user_log;
CREATE TABLE tb_user_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
artical_id INT NOT NULL COMMENT '视频ID',
in_time datetime COMMENT '进入时间',
out_time datetime COMMENT '离开时间',
sign_in TINYINT DEFAULT 0 COMMENT '是否签到'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_log(uid, artical_id, in_time, out_time, sign_in) VALUES
(101, 0, '2021-07-07 10:00:00', '2021-07-07 10:00:09', 1),
(101, 0, '2021-07-08 10:00:00', '2021-07-08 10:00:09', 1),
(101, 0, '2021-07-09 10:00:00', '2021-07-09 10:00:42', 1),
(101, 0, '2021-07-10 10:00:00', '2021-07-10 10:00:09', 1),
(101, 0, '2021-07-11 23:59:55', '2021-07-11 23:59:59', 1),
(101, 0, '2021-07-12 10:00:28', '2021-07-12 10:00:50', 1),
(101, 0, '2021-07-13 10:00:28', '2021-07-13 10:00:50', 1),
(102, 0, '2021-10-01 10:00:28', '2021-10-01 10:00:50', 1),
(102, 0, '2021-10-02 10:00:01', '2021-10-02 10:01:50', 1),
(102, 0, '2021-10-03 11:00:55', '2021-10-03 11:00:59', 1),
(102, 0, '2021-10-04 11:00:45', '2021-10-04 11:00:55', 0),
(102, 0, '2021-10-05 11:00:53', '2021-10-05 11:00:59', 1),
(102, 0, '2021-10-06 11:00:45', '2021-10-06 11:00:55', 1);
select
uid
,month
,sum(coin) coin # 每个用户每月获得的金币数
from(
select
uid
,month
,dt
,rn
,dt_tmp
,case when rk%7=3 then 3
when rk%7=0 then 7
else 1 end coin # 每个用户每天得到的金币数
from (
select
uid
,month
,dt
,rn
,date_sub(dt,interval rn day) dt_tmp # 签到日期-排序编号=差值(连续问题核心就是利用排序编号与签到日期的差值是相等的。因为如果是连续的话,编号也是自增1,日期也是自增1。)
,dense_rank() over(partition by date_sub(dt,interval rn day),uid order by dt) rk # 连续签到的天数
from (
select distinct # 防止一天有多次签到活动,distinct去重
uid
,date_format(in_time,'%Y%m') month
,date(in_time) dt # 签到日期
,dense_rank() over(partition by uid order by date(in_time)) rn # 签到日期的排序编号(用dense_rank方便去重)
from tb_user_log
where date(in_time) between '2021-07-07' and '2021-10-31' # 从2021年7月7日0点开始到10月底结束
and artical_id=0 and sign_in=1 # 只有artical_id为0时sign_in值才有效
) a
) a1
) a2
group by uid,month
order by month,uid
;
三、电商场景(某东商城)
1.计算商城中2021年每月的GMV
题目:请计算商城中2021年每月的GMV,输出GMV大于10w的每月GMV,值保留到整数。
--输入:
DROP TABLE IF EXISTS tb_order_overall;
CREATE TABLE tb_order_overall (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
order_id INT NOT NULL COMMENT '订单号',
uid INT NOT NULL COMMENT '用户ID',
event_time datetime COMMENT '下单时间',
total_amount DECIMAL NOT NULL COMMENT '订单总金额',
total_cnt INT NOT NULL COMMENT '订单商品总件数',
`status` TINYINT NOT NULL COMMENT '订单状态'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_order_overall(order_id, uid, event_time, total_amount, total_cnt, `status`) VALUES
(301001, 101, '2021-10-01 10:00:00', 15900, 2, 1),
(301002, 101, '2021-10-01 11:00:00', 15900, 2, 1),
(301003, 102, '2021-10-02 10:00:00', 34500, 8, 0),
(301004, 103, '2021-10-12 10:00:00', 43500, 9, 1),
(301005, 105, '2021-11-01 10:00:00', 31900, 7, 1),
(301006, 102, '2021-11-02 10:00:00', 24500, 6, 1),
(391007, 102, '2021-11-03 10:00:00', -24500, 6, 2),
(301008, 104, '2021-11-04 10:00:00', 55500, 12, 0);
select
date_format(event_time,'%Y-%m') month
,round(sum(if(status in ('0','1'),total_amount,0)),0) GMV # 2021年每月的GMV
from tb_order_overall
where year(event_time)='2021' # 2021年
group by date_format(event_time,'%Y-%m')
having sum(if(status in ('0','1'),total_amount,0))>100000 # GMV大于10w
order by GMV
;
2.统计2021年10月每个退货率不大于0.5的商品各项指标
题目:请统计2021年10月每个有展示记录的退货率不大于0.5的商品各项指标。
--输入:
DROP TABLE IF EXISTS tb_user_event;
CREATE TABLE tb_user_event (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
product_id INT NOT NULL COMMENT '商品ID',
event_time datetime COMMENT '行为时间',
if_click TINYINT COMMENT '是否点击',
if_cart TINYINT COMMENT '是否加购物车',
if_payment TINYINT COMMENT '是否付款',
if_refund TINYINT COMMENT '是否退货退款'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_event(uid, product_id, event_time, if_click, if_cart, if_payment, if_refund) VALUES
(101, 8001, '2021-10-01 10:00:00', 0, 0, 0, 0),
(102, 8001, '2021-10-01 10:00:00', 1, 0, 0, 0),
(103, 8001, '2021-10-01 10:00:00', 1, 1, 0, 0),
(104, 8001, '2021-10-02 10:00:00', 1, 1, 1, 0),
(105, 8001, '2021-10-02 10:00:00', 1, 1, 1, 0),
(101, 8002, '2021-10-03 10:00:00', 1, 1, 1, 0),
(109, 8001, '2021-10-04 10:00:00', 1, 1, 1, 1);
select
product_id
,round(if(play_num=0,0,click_num/play_num),3) ctr # 商品点展比=点击数÷展示数(当分母为0时整体结果记为0)
,round(if(click_num=0,0,cart_num/click_num),3) cart_rate # 加购率=加购数÷点击数(当分母为0时整体结果记为0)
,round(if(cart_num=0,0,payment_num/cart_num),3) payment_rate # 成单率=付款数÷加购数(当分母为0时整体结果记为0)
,round(if(payment_num=0,0,refund_num/payment_num),3) refund_rate # 退货率=退款数÷付款数(当分母为0时整体结果记为0)
from (
select
product_id
,count(event_time) play_num # 展示数
,sum(if_click) click_num # 点击数
,sum(if_cart) cart_num # 加购数
,sum(if_payment) payment_num # 付款数
,sum(if_refund) refund_num # 退款数
from tb_user_event
where date_format(event_time,'%Y-%m')='2021-10' # 2021年10月
and event_time is not null # 每个有展示记录的商品
group by product_id
) a
where round(ifnull(refund_num/payment_num,0),3)<=0.5 # 退货率不大于0.5
order by product_id
;
3.某店铺的各商品毛利率及店铺整体毛利率
题目:请计算2021年10月以来店铺901中商品毛利率大于24.9%的商品信息及店铺整体毛利率。
--输入:
DROP TABLE IF EXISTS tb_order_overall;
CREATE TABLE tb_order_overall (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
order_id INT NOT NULL COMMENT '订单号',
uid INT NOT NULL COMMENT '用户ID',
event_time datetime COMMENT '下单时间',
total_amount DECIMAL NOT NULL COMMENT '订单总金额',
total_cnt INT NOT NULL COMMENT '订单商品总件数',
`status` TINYINT NOT NULL COMMENT '订单状态'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_order_overall(order_id, uid, event_time, total_amount, total_cnt, `status`) VALUES
(301001, 101, '2021-10-01 10:00:00', 30000, 3, 1),
(301002, 102, '2021-10-01 11:00:00', 23900, 2, 1),
(301003, 103, '2021-10-02 10:00:00', 31000, 2, 1);
DROP TABLE IF EXISTS tb_product_info;
CREATE TABLE tb_product_info (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
product_id INT NOT NULL COMMENT '商品ID',
shop_id INT NOT NULL COMMENT '店铺ID',
tag VARCHAR(12) COMMENT '商品类别标签',
in_price DECIMAL NOT NULL COMMENT '进货价格',
quantity INT NOT NULL COMMENT '进货数量',
release_time datetime COMMENT '上架时间'
) CHARACTER SET utf8 COLLATE utf8_bin;
DROP TABLE IF EXISTS tb_order_detail;
CREATE TABLE tb_order_detail (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
order_id INT NOT NULL COMMENT '订单号',
product_id INT NOT NULL COMMENT '商品ID',
price DECIMAL NOT NULL COMMENT '商品单价',
cnt INT NOT NULL COMMENT '下单数量'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_product_info(product_id, shop_id, tag, in_price, quantity, release_time) VALUES
(8001, 901, '家电', 6000, 100, '2020-01-01 10:00:00'),
(8002, 902, '家电', 12000, 50, '2020-01-01 10:00:00'),
(8003, 901, '3C数码', 12000, 50, '2020-01-01 10:00:00');
INSERT INTO tb_order_detail(order_id, product_id, price, cnt) VALUES
(301001, 8001, 8500, 2),
(301001, 8002, 15000, 1),
(301002, 8001, 8500, 1),
(301002, 8002, 16000, 1),
(301003, 8002, 14000, 1),
(301003, 8003, 18000, 1);
select
ifnull(product_id,'店铺汇总') product_id
,profit_rate
from (
select
product_id
,concat(round((1-(sum(in_price*cnt)/sum(price*cnt)))*100,1),'%') profit_rate # 商品毛利率=(1-进价/平均单件售价)*100%
from (
select
a.shop_id
,a.product_id
,a.in_price # 进货价格
,b.order_id
,b.price # 商品单价
,b.cnt # 下单数量
from (
select
product_id
,shop_id
,in_price # 进货价格
from tb_product_info
where shop_id='901' # 店铺901
) a join (
select
order_id
,product_id
,price # 商品单价
,cnt # 下单数量
from tb_order_detail
) b on a.product_id=b.product_id
join (
select
order_id
,event_time
from tb_order_overall
where substr(event_time,1,7)>='2021-10' # 2021年10月以来
) c on b.order_id=c.order_id
) a1
group by product_id
with rollup
having (1-(sum(in_price*cnt)/sum(price*cnt)))*100>24.9 or product_id is null # 商品毛利率大于24.9%
order by product_id
) a2
;
1. 为什么要加or product_id is null?
因为先with rollup再having,所以“店铺汇总”数据也参与了筛选,所以必须加上“店铺汇总”,而此时“店铺汇总”的命名还是null
2. 为什么order by product_id要加在having之后(即在a1层),而不能加在最外层a2层?
因为在a1层,“店铺汇总”的名称还是null,在Mysql中,字符串的排序是根据字符串的首字母的ASCII码进行排序的,首字母相同的,则会根据第二个字母排序,以此类推。
在ASCII码中:
NULL的顺序排在第一位,对应十进制的0,因此只要是升序排序,NULL字段永远排在第一位;
数字0对应的ASCII的十进制是48;
大写字母A对应的 ASCII 的十进制是65,小写字母a对应的 ASCII 的十进制是97。
汉字排序用order by字段是不行的,因为mysql编码一般是UTF8的,而要相对汉字进行排序必须用GBK编码,可以再查询的时候通过sql将字段进行GBK编码,如:order by convert(name using 'gbk'),则汉字就可按首拼音排序
4.零食类商品中复购率top3高的商品
题目:请统计零食类商品中复购率top3高的商品。
--输入:
DROP TABLE IF EXISTS tb_order_overall;
CREATE TABLE tb_order_overall (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
order_id INT NOT NULL COMMENT '订单号',
uid INT NOT NULL COMMENT '用户ID',
event_time datetime COMMENT '下单时间',
total_amount DECIMAL NOT NULL COMMENT '订单总金额',
total_cnt INT NOT NULL COMMENT '订单商品总件数',
`status` TINYINT NOT NULL COMMENT '订单状态'
) CHARACTER SET utf8 COLLATE utf8_bin;
DROP TABLE IF EXISTS tb_product_info;
CREATE TABLE tb_product_info (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
product_id INT NOT NULL COMMENT '商品ID',
shop_id INT NOT NULL COMMENT '店铺ID',
tag VARCHAR(12) COMMENT '商品类别标签',
in_price DECIMAL NOT NULL COMMENT '进货价格',
quantity INT NOT NULL COMMENT '进货数量',
release_time datetime COMMENT '上架时间'
) CHARACTER SET utf8 COLLATE utf8_bin;
DROP TABLE IF EXISTS tb_order_detail;
CREATE TABLE tb_order_detail (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
order_id INT NOT NULL COMMENT '订单号',
product_id INT NOT NULL COMMENT '商品ID',
price DECIMAL NOT NULL COMMENT '商品单价',
cnt INT NOT NULL COMMENT '下单数量'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_product_info(product_id, shop_id, tag, in_price, quantity, release_time) VALUES
(8001, 901, '零食', 60, 1000, '2020-01-01 10:00:00'),
(8002, 901, '零食', 140, 500, '2020-01-01 10:00:00'),
(8003, 901, '零食', 160, 500, '2020-01-01 10:00:00');
INSERT INTO tb_order_overall(order_id, uid, event_time, total_amount, total_cnt, `status`) VALUES
(301001, 101, '2021-09-30 10:00:00', 140, 1, 1),
(301002, 102, '2021-10-01 11:00:00', 235, 2, 1),
(301011, 102, '2021-10-31 11:00:00', 250, 2, 1),
(301003, 101, '2021-11-02 10:00:00', 300, 2, 1),
(301013, 105, '2021-11-02 10:00:00', 300, 2, 1),
(301005, 104, '2021-11-03 10:00:00', 170, 1, 1);
INSERT INTO tb_order_detail(order_id, product_id, price, cnt) VALUES
(301001, 8002, 150, 1),
(301011, 8003, 200, 1),
(301011, 8001, 80, 1),
(301002, 8001, 85, 1),
(301002, 8003, 180, 1),
(301003, 8002, 140, 1),
(301003, 8003, 180, 1),
(301013, 8002, 140, 2),
(301005, 8003, 180, 1);
-- 方法1
select
product_id
,round(count(if(num>=2,uid,null))/max(user_num),3) repurchase_rate # 某商品复购率 = 近90天内购买它至少两次的人数 ÷ 购买它的总人数
from (
select
product_id
,uid
,count(dt) num # 每个商品被每个人购买的次数
,count(uid) over(partition by product_id) user_num # 每个商品购买它的总人数
from (
select
a.product_id
,b.order_id
,c.uid
,c.dt
from (
select
product_id
from tb_product_info
where tag='零食' # 零食类商品
) a join (
select
order_id
,product_id
from tb_order_detail
) b on a.product_id=b.product_id
join (
select
order_id
,uid
,date(event_time) dt
,date(max(event_time) over()) max_dt # 最大日期(当天)
from tb_order_overall
) c on b.order_id=c.order_id
where dt>=date_sub(max_dt,interval 89 day) # 近90天指包含最大日期(记为当天)在内的近90天
) a1
group by product_id,uid
) a2
group by product_id
order by repurchase_rate desc,product_id asc
limit 3 # 复购率top3高
;
-- 方法2
select
product_id
,round(sum(repurchase) / count(repurchase), 3) as repurchase_rate # 某商品复购率 = 近90天内购买它至少两次的人数 ÷ 购买它的总人数
from (
select
product_id
,uid
,if(count(dt)>1, 1, 0) as repurchase # 计算是否复购,如果购买次数至少两次,则标记为1,也就是复购,负责标记为0
from (
select
a.product_id
,b.order_id
,c.uid
,c.dt
from (
select
product_id
from tb_product_info
where tag='零食' # 零食类商品
) a join (
select
order_id
,product_id
from tb_order_detail
) b on a.product_id=b.product_id
join (
select
order_id
,uid
,date(event_time) dt
,date(max(event_time) over()) max_dt # 最大日期(当天)
from tb_order_overall
) c on b.order_id=c.order_id
where dt>=date_sub(max_dt,interval 89 day) # 近90天指包含最大日期(记为当天)在内的近90天
) a1
group by product_id,uid
) a2
group by product_id
order by repurchase_rate desc,product_id asc
limit 3;
5.10月的新户客单价和获客成本
题目:请计算2021年10月商城里所有新用户的首单平均交易金额(客单价)和平均获客成本(保留一位小数)。
--输入:
DROP TABLE IF EXISTS tb_order_overall;
CREATE TABLE tb_order_overall (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
order_id INT NOT NULL COMMENT '订单号',
uid INT NOT NULL COMMENT '用户ID',
event_time datetime COMMENT '下单时间',
total_amount DECIMAL NOT NULL COMMENT '订单总金额',
total_cnt INT NOT NULL COMMENT '订单商品总件数',
`status` TINYINT NOT NULL COMMENT '订单状态'
) CHARACTER SET utf8 COLLATE utf8_bin;
DROP TABLE IF EXISTS tb_product_info;
CREATE TABLE tb_product_info (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
product_id INT NOT NULL COMMENT '商品ID',
shop_id INT NOT NULL COMMENT '店铺ID',
tag VARCHAR(12) COMMENT '商品类别标签',
in_price DECIMAL NOT NULL COMMENT '进货价格',
quantity INT NOT NULL COMMENT '进货数量',
release_time datetime COMMENT '上架时间'
) CHARACTER SET utf8 COLLATE utf8_bin;
DROP TABLE IF EXISTS tb_order_detail;
CREATE TABLE tb_order_detail (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
order_id INT NOT NULL COMMENT '订单号',
product_id INT NOT NULL COMMENT '商品ID',
price DECIMAL NOT NULL COMMENT '商品单价',
cnt INT NOT NULL COMMENT '下单数量'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_product_info(product_id, shop_id, tag, in_price, quantity, release_time) VALUES
(8001, 901, '日用', 60, 1000, '2020-01-01 10:00:00'),
(8002, 901, '零食', 140, 500, '2020-01-01 10:00:00'),
(8003, 901, '零食', 160, 500, '2020-01-01 10:00:00'),
(8004, 902, '零食', 130, 500, '2020-01-01 10:00:00');
INSERT INTO tb_order_overall(order_id, uid, event_time, total_amount, total_cnt, `status`) VALUES
(301002, 102, '2021-10-01 11:00:00', 235, 2, 1),
(301003, 101, '2021-10-02 10:00:00', 300, 2, 1),
(301005, 104, '2021-10-03 10:00:00', 160, 1, 1);
INSERT INTO tb_order_detail(order_id, product_id, price, cnt) VALUES
(301002, 8001, 85, 1),
(301002, 8003, 180, 1),
(301003, 8004, 140, 1),
(301003, 8003, 180, 1),
(301005, 8003, 180, 1);
select
round(avg(total_amount), 1) as avg_amount,
round(avg(raw_amount-total_amount), 1) as avg_cost
from (
select
uid,
total_amount,
raw_amount
from(
select
distinct
uid,
first_value(event_time) over(wd_uid_first) as event_time,
first_value(order_id) over(wd_uid_first) as order_id,
first_value(total_amount) over(wd_uid_first) as total_amount
from tb_order_overall
window wd_uid_first as (partition by uid order by event_time)
)a join (
select
order_id,
sum(price * cnt) as raw_amount
from tb_order_detail
group by order_id
)b on a.order_id=b.order_id
where date_format(event_time, '%Y-%m') = '2021-10'
)c
6.店铺901国庆期间的7日动销率和滞销率
题目:请计算店铺901在2021年国庆头3天的7日动销率和滞销率,结果保留3位小数,按日期升序排序。
--输入:
DROP TABLE IF EXISTS tb_order_overall;
CREATE TABLE tb_order_overall (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
order_id INT NOT NULL COMMENT '订单号',
uid INT NOT NULL COMMENT '用户ID',
event_time datetime COMMENT '下单时间',
total_amount DECIMAL NOT NULL COMMENT '订单总金额',
total_cnt INT NOT NULL COMMENT '订单商品总件数',
`status` TINYINT NOT NULL COMMENT '订单状态'
) CHARACTER SET utf8 COLLATE utf8_bin;
DROP TABLE IF EXISTS tb_product_info;
CREATE TABLE tb_product_info (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
product_id INT NOT NULL COMMENT '商品ID',
shop_id INT NOT NULL COMMENT '店铺ID',
tag VARCHAR(12) COMMENT '商品类别标签',
in_price DECIMAL NOT NULL COMMENT '进货价格',
quantity INT NOT NULL COMMENT '进货数量',
release_time datetime COMMENT '上架时间'
) CHARACTER SET utf8 COLLATE utf8_bin;
DROP TABLE IF EXISTS tb_order_detail;
CREATE TABLE tb_order_detail (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
order_id INT NOT NULL COMMENT '订单号',
product_id INT NOT NULL COMMENT '商品ID',
price DECIMAL NOT NULL COMMENT '商品单价',
cnt INT NOT NULL COMMENT '下单数量'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_product_info(product_id, shop_id, tag, in_price, quantity, release_time) VALUES
(8001, 901, '日用', 60, 1000, '2020-01-01 10:00:00'),
(8002, 901, '零食', 140, 500, '2020-01-01 10:00:00'),
(8003, 901, '零食', 160, 500, '2020-01-01 10:00:00');
INSERT INTO tb_order_overall(order_id, uid, event_time, total_amount, total_cnt, `status`) VALUES
(301004, 102, '2021-09-30 10:00:00', 170, 1, 1),
(301005, 104, '2021-10-01 10:00:00', 160, 1, 1),
(301003, 101, '2021-10-02 10:00:00', 300, 2, 1),
(301002, 102, '2021-10-03 11:00:00', 235, 2, 1);
INSERT INTO tb_order_detail(order_id, product_id, price, cnt) VALUES
(301004, 8002, 180, 1),
(301005, 8002, 170, 1),
(301002, 8001, 85, 1),
(301002, 8003, 180, 1),
(301003, 8002, 150, 1),
(301003, 8003, 180, 1);
with t1 as (
# 每天销售的商品详情表
select
date(b.event_time) ddt
,a.product_id
,c.cnt
,a.release_time
,b.event_time
from (
select
product_id
,shop_id
,release_time
from tb_product_info
where shop_id='901' # 店铺901
) a join (
select
order_id
,product_id
,cnt
from tb_order_detail
) c on a.product_id=c.product_id
join (
select
order_id
,event_time
from tb_order_overall
where date(event_time) between '2021-09-25' and '2021-10-03' # 国庆头3天的7日
) b on c.order_id=b.order_id
)
,
t2 as (
# 国庆前3天每天的在售商品数
select
dt
,count(distinct product_id) on_sale_num
from (
select
product_id
,date(event_time) dt
from tb_product_info,tb_order_overall
where release_time<event_time # 在架商品=上架时间<下单时间
and product_id in (
select
product_id
from tb_product_info
where shop_id='901' # 店铺901
)
and date(event_time) between '2021-10-01' and '2021-10-03' # 国庆头3天
) a
group by dt
)
select
dt
,round(product_num/on_sale_num,3) sale_rate # 动销率=有销量的商品/已上架总商品数
,round(1-product_num/on_sale_num,3) unsale_rate # 滞销率=没有销量的商品/已上架总商品数
from (
select
dt # 国庆前3天
,max(on_sale_num) on_sale_num # 每天的在售商品数
,count(distinct product_id) product_num # 截止当天有销量的商品数
from (
select
t2.dt
,t2.on_sale_num
,t1.ddt
,t1.product_id
from t2
left join t1
on datediff(t2.dt,t1.ddt) between 0 and 6 # datediff('2021-10-01','2021-09-25')=6,10月1日的近7日第一天是9月25日
order by t2.dt,t1.ddt
) a
group by dt
) a1
order by dt
;
四、出行场景(某滴打车)
1.2021年国庆在北京接单3次及以上的司机统计信息
题目:请统计2021年国庆7天期间在北京市接单至少3次的司机的平均接单数和平均兼职收入(暂不考虑平台佣金,直接计算完成的订单费用总额),结果保留3位小数。
DROP TABLE IF EXISTS tb_get_car_record,tb_get_car_order;
CREATE TABLE tb_get_car_record (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
city VARCHAR(10) NOT NULL COMMENT '城市',
event_time datetime COMMENT '打车时间',
end_time datetime COMMENT '打车结束时间',
order_id INT COMMENT '订单号'
) CHARACTER SET utf8 COLLATE utf8_bin;
CREATE TABLE tb_get_car_order (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
order_id INT NOT NULL COMMENT '订单号',
uid INT NOT NULL COMMENT '用户ID',
driver_id INT NOT NULL COMMENT '司机ID',
order_time datetime COMMENT '接单时间',
start_time datetime COMMENT '开始计费的上车时间',
finish_time datetime COMMENT '订单结束时间',
mileage DOUBLE COMMENT '行驶里程数',
fare DOUBLE COMMENT '费用',
grade TINYINT COMMENT '评分'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_get_car_record(uid, city, event_time, end_time, order_id) VALUES
(101, '北京', '2021-10-01 07:00:00', '2021-10-01 07:02:00', null),
(102, '北京', '2021-10-01 09:00:30', '2021-10-01 09:01:00', 9001),
(101, '北京', '2021-10-01 08:28:10', '2021-10-01 08:30:00', 9002),
(103, '北京', '2021-10-02 07:59:00', '2021-10-02 08:01:00', 9003),
(104, '北京', '2021-10-03 07:59:20', '2021-10-03 08:01:00', 9004),
(105, '北京', '2021-10-01 08:00:00', '2021-10-01 08:02:10', 9005),
(106, '北京', '2021-10-01 17:58:00', '2021-10-01 18:01:00', 9006),
(107, '北京', '2021-10-02 11:00:00', '2021-10-02 11:01:00', 9007),
(108, '北京', '2021-10-02 21:00:00', '2021-10-02 21:01:00', 9008) ;
INSERT INTO tb_get_car_order(order_id, uid, driver_id, order_time, start_time, finish_time, mileage, fare, grade) VALUES
(9002, 101, 201, '2021-10-01 08:30:00', null, '2021-10-01 08:31:00', null, null, null),
(9001, 102, 202, '2021-10-01 09:01:00', '2021-10-01 09:06:00', '2021-10-01 09:31:00', 10.0, 41.5, 5),
(9003, 103, 202, '2021-10-02 08:01:00', '2021-10-02 08:15:00', '2021-10-02 08:31:00', 11.0, 41.5, 4),
(9004, 104, 202, '2021-10-03 08:01:00', '2021-10-03 08:13:00', '2021-10-03 08:31:00', 7.5, 22, 4),
(9005, 105, 203, '2021-10-01 08:02:10', '2021-10-01 08:18:00', '2021-10-01 08:31:00', 15.0, 44, 5),
(9006, 106, 203, '2021-10-01 18:01:00', '2021-10-01 18:09:00', '2021-10-01 18:31:00', 8.0, 25, 5),
(9007, 107, 203, '2021-10-02 11:01:00', '2021-10-02 11:07:00', '2021-10-02 11:31:00', 9.9, 30, 5),
(9008, 108, 203, '2021-10-02 21:01:00', '2021-10-02 21:10:00', '2021-10-02 21:31:00', 13.2, 38, 4);
select
city
,round(avg(order_num),3) avg_order_num # 平均接单数
,round(avg(income),3) avg_income # 平均兼职收入
from (
select
a.city
,b.driver_id
,count(b.order_id) order_num # 接单数
,sum(b.fare) income # 兼职收入
from (
select
city
,order_id
from tb_get_car_record
where city='北京' # 北京市
) a join (
select
order_id
,driver_id
,finish_time
,fare
from tb_get_car_order
where date(finish_time) between '2021-10-01' and '2021-10-07' # 在2021年国庆期间
) b on a.order_id=b.order_id
group by a.city,b.driver_id
having count(b.order_id) >=3 # 接单至少3次
) a1
group by city
;
2.有取消订单记录的司机平均评分
题目:请找到2021年10月有过取消订单记录的司机,计算他们每人全部已完成的有评分订单的平均评分及总体平均评分,保留1位小数。先按driver_id升序输出,再输出总体情况。
--输入:
DROP TABLE IF EXISTS tb_get_car_record,tb_get_car_order;
CREATE TABLE tb_get_car_record (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
city VARCHAR(10) NOT NULL COMMENT '城市',
event_time datetime COMMENT '打车时间',
end_time datetime COMMENT '打车结束时间',
order_id INT COMMENT '订单号'
) CHARACTER SET utf8 COLLATE utf8_bin;
CREATE TABLE tb_get_car_order (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
order_id INT NOT NULL COMMENT '订单号',
uid INT NOT NULL COMMENT '用户ID',
driver_id INT NOT NULL COMMENT '司机ID',
order_time datetime COMMENT '接单时间',
start_time datetime COMMENT '开始计费的上车时间',
finish_time datetime COMMENT '订单结束时间',
mileage FLOAT COMMENT '行驶里程数',
fare FLOAT COMMENT '费用',
grade TINYINT COMMENT '评分'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_get_car_record(uid, city, event_time, end_time, order_id) VALUES
(101, '北京', '2021-10-01 07:00:00', '2021-10-01 07:02:00', null),
(102, '北京', '2021-10-01 09:00:30', '2021-10-01 09:01:00', 9001),
(101, '北京', '2021-10-01 08:28:10', '2021-10-01 08:30:00', 9002),
(103, '北京', '2021-10-02 07:59:00', '2021-10-02 08:01:00', 9003),
(104, '北京', '2021-10-03 07:59:20', '2021-10-03 08:01:00', 9004),
(105, '北京', '2021-10-01 08:00:00', '2021-10-01 08:02:10', 9005),
(106, '北京', '2021-10-01 17:58:00', '2021-10-01 18:01:00', 9006),
(107, '北京', '2021-10-02 11:00:00', '2021-10-02 11:01:00', 9007),
(108, '北京', '2021-10-02 21:00:00', '2021-10-02 21:01:00', 9008),
(109, '北京', '2021-10-08 18:00:00', '2021-10-08 18:01:00', 9009);
INSERT INTO tb_get_car_order(order_id, uid, driver_id, order_time, start_time, finish_time, mileage, fare, grade) VALUES
(9002, 101, 202, '2021-10-01 08:30:00', null, '2021-10-01 08:31:00', null, null, null),
(9001, 102, 202, '2021-10-01 09:01:00', '2021-10-01 09:06:00', '2021-10-01 09:31:00', 10.0, 41.5, 5),
(9003, 103, 202, '2021-10-02 08:01:00', '2021-10-02 08:15:00', '2021-10-02 08:31:00', 11.0, 41.5, 4),
(9004, 104, 202, '2021-10-03 08:01:00', '2021-10-03 08:13:00', '2021-10-03 08:31:00', 7.5, 22, 4),
(9005, 105, 203, '2021-10-01 08:02:10', null, '2021-10-01 08:31:00', null, null, null),
(9006, 106, 203, '2021-10-01 18:01:00', '2021-10-01 18:09:00', '2021-10-01 18:31:00', 8.0, 25.5, 5),
(9007, 107, 203, '2021-10-02 11:01:00', '2021-10-02 11:07:00', '2021-10-02 11:31:00', 9.9, 30, 5),
(9008, 108, 203, '2021-10-02 21:01:00', '2021-10-02 21:10:00', '2021-10-02 21:31:00', 13.2, 38, 4),
(9009, 109, 203, '2021-10-08 18:01:00', '2021-10-08 18:11:50', '2021-10-08 18:51:00', 13, 40, 5);
field(str,str1,str2,str3,...)
其中,str是要查找的字符串,str1、str2、str3是用于比较的字符串。field 函数会返回str在后面的字符串中的位置,如果找不到匹配的字符串,则返回 0
select
ifnull(driver_id,'总体') driver_id # 司机ID
,avg_grade # 平均评分和总体平均评分
from (
select
driver_id
,round(avg(grade),1) avg_grade # 平均评分
from tb_get_car_order
where grade is not null # 全部已完成的有评分订单
and driver_id in (
select
driver_id
from tb_get_car_order
where substr(finish_time,1,7)='2021-10' # 2021年10月
and start_time is null # 有过取消订单记录(若乘客上车前,乘客或司机点击取消订单,会将打车订单表对应订单的finish_time-订单完成时间填充为取消时间,其余字段设为null。)
group by driver_id
)
group by driver_id
with rollup
) a
order by field(driver_id,'总体'),driver_id # 即返回driver_id在“总体”中的位置,为0
; # 也可以写成order by case when driver_id='总体' then 99999999 else driver_id end
3.每个城市中评分最高的司机信息
题目:请统计每个城市中评分最高的司机平均评分、日均接单量和日均行驶里程数。
--输入:
DROP TABLE IF EXISTS tb_get_car_record,tb_get_car_order;
CREATE TABLE tb_get_car_record (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
city VARCHAR(10) NOT NULL COMMENT '城市',
event_time datetime COMMENT '打车时间',
end_time datetime COMMENT '打车结束时间',
order_id INT COMMENT '订单号'
) CHARACTER SET utf8 COLLATE utf8_bin;
CREATE TABLE tb_get_car_order (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
order_id INT NOT NULL COMMENT '订单号',
uid INT NOT NULL COMMENT '用户ID',
driver_id INT NOT NULL COMMENT '司机ID',
order_time datetime COMMENT '接单时间',
start_time datetime COMMENT '开始计费的上车时间',
finish_time datetime COMMENT '订单结束时间',
mileage FLOAT COMMENT '行驶里程数',
fare FLOAT COMMENT '费用',
grade TINYINT COMMENT '评分'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_get_car_record(uid, city, event_time, end_time, order_id) VALUES
(101, '北京', '2021-10-01 07:00:00', '2021-10-01 07:02:00', null),
(102, '北京', '2021-10-01 09:00:30', '2021-10-01 09:01:00', 9001),
(101, '北京', '2021-10-01 08:28:10', '2021-10-01 08:30:00', 9002),
(103, '北京', '2021-10-02 07:59:00', '2021-10-02 08:01:00', 9003),
(104, '北京', '2021-10-03 07:59:20', '2021-10-03 08:01:00', 9004),
(105, '北京', '2021-10-01 08:00:00', '2021-10-01 08:02:10', 9005),
(106, '北京', '2021-10-01 17:58:00', '2021-10-01 18:01:00', 9006),
(107, '北京', '2021-10-02 11:00:00', '2021-10-02 11:01:00', 9007),
(108, '北京', '2021-10-02 21:00:00', '2021-10-02 21:01:00', 9008),
(109, '北京', '2021-10-08 18:00:00', '2021-10-08 18:01:00', 9009);
INSERT INTO tb_get_car_order(order_id, uid, driver_id, order_time, start_time, finish_time, mileage, fare, grade) VALUES
(9002, 101, 202, '2021-10-01 08:30:00', null, '2021-10-01 08:31:00', null, null, null),
(9001, 102, 202, '2021-10-01 09:01:00', '2021-10-01 09:06:00', '2021-10-01 09:31:00', 10.0, 41.5, 5),
(9003, 103, 202, '2021-10-02 08:01:00', '2021-10-02 08:15:00', '2021-10-02 08:31:00', 11.0, 41.5, 4),
(9004, 104, 202, '2021-10-03 08:01:00', '2021-10-03 08:13:00', '2021-10-03 08:31:00', 7.5, 22, 4),
(9005, 105, 203, '2021-10-01 08:02:10', null, '2021-10-01 08:31:00', null, null, null),
(9006, 106, 203, '2021-10-01 18:01:00', '2021-10-01 18:09:00', '2021-10-01 18:31:00', 8.0, 25.5, 5),
(9007, 107, 203, '2021-10-02 11:01:00', '2021-10-02 11:07:00', '2021-10-02 11:31:00', 9.9, 30, 5),
(9008, 108, 203, '2021-10-02 21:01:00', '2021-10-02 21:10:00', '2021-10-02 21:31:00', 13.2, 38, 4),
(9009, 109, 203, '2021-10-08 18:01:00', '2021-10-08 18:11:50', '2021-10-08 18:51:00', 13, 40, 5);
select
city
,driver_id
,round(avg_grade,1) avg_grade # 平均评分
,round(avg_order_num,1) avg_order_num # 日均接单量
,round(avg_mileage,3) avg_mileage # 日均行驶里程数
from (
# 当有司机接单时,在打车订单表生成一条订单,填充order_time-接单时间及其左边的字段
select
a.city
,b.driver_id
,avg(b.grade) avg_grade # 平均评分
,count(b.order_id)/count(distinct order_time) avg_order_num # 日均接单量
,sum(b.mileage)/count(distinct order_time) avg_mileage # 日均行驶里程数
,rank() over(partition by a.city order by avg(grade) desc) rk # 每个城市中评分最高的司机(有多个司机评分并列最高时,都输出)
from (
select
city
,order_id
from tb_get_car_record
) a join (
select
order_id
,driver_id
,grade
,mileage
,date(order_time) order_time
from tb_get_car_order
) b on a.order_id=b.order_id
group by a.city,b.driver_id
) a1
where rk=1
order by avg_order_num
;
4.国庆期间近7日日均取消订单量
题目:请统计国庆头3天里,每天的近7日日均订单完成量和日均订单取消量,按日期升序排序。结果保留2位小数。
--输入:
DROP TABLE IF EXISTS tb_get_car_record,tb_get_car_order;
CREATE TABLE tb_get_car_record (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
city VARCHAR(10) NOT NULL COMMENT '城市',
event_time datetime COMMENT '打车时间',
end_time datetime COMMENT '打车结束时间',
order_id INT COMMENT '订单号'
) CHARACTER SET utf8 COLLATE utf8_bin;
CREATE TABLE tb_get_car_order (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
order_id INT NOT NULL COMMENT '订单号',
uid INT NOT NULL COMMENT '用户ID',
driver_id INT NOT NULL COMMENT '司机ID',
order_time datetime COMMENT '接单时间',
start_time datetime COMMENT '开始计费的上车时间',
finish_time datetime COMMENT '订单结束时间',
mileage FLOAT COMMENT '行驶里程数',
fare FLOAT COMMENT '费用',
grade TINYINT COMMENT '评分'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_get_car_record(uid, city, event_time, end_time, order_id) VALUES
(101, '北京', '2021-09-25 08:28:10', '2021-09-25 08:30:00', 9011),
(102, '北京', '2021-09-25 09:00:30', '2021-09-25 09:01:00', 9012),
(103, '北京', '2021-09-26 07:59:00', '2021-09-26 08:01:00', 9013),
(104, '北京', '2021-09-26 07:59:00', '2021-09-26 08:01:00', 9023),
(104, '北京', '2021-09-27 07:59:20', '2021-09-27 08:01:00', 9014),
(105, '北京', '2021-09-28 08:00:00', '2021-09-28 08:02:10', 9015),
(106, '北京', '2021-09-29 17:58:00', '2021-09-29 18:01:00', 9016),
(107, '北京', '2021-09-30 11:00:00', '2021-09-30 11:01:00', 9017),
(108, '北京', '2021-09-30 21:00:00', '2021-09-30 21:01:00', 9018),
(102, '北京', '2021-10-01 09:00:30', '2021-10-01 09:01:00', 9002),
(106, '北京', '2021-10-01 17:58:00', '2021-10-01 18:01:00', 9006),
(101, '北京', '2021-10-02 08:28:10', '2021-10-02 08:30:00', 9001),
(107, '北京', '2021-10-02 11:00:00', '2021-10-02 11:01:00', 9007),
(108, '北京', '2021-10-02 21:00:00', '2021-10-02 21:01:00', 9008),
(103, '北京', '2021-10-02 07:59:00', '2021-10-02 08:01:00', 9003),
(104, '北京', '2021-10-03 07:59:20', '2021-10-03 08:01:00', 9004),
(109, '北京', '2021-10-03 18:00:00', '2021-10-03 18:01:00', 9009);
INSERT INTO tb_get_car_order(order_id, uid, driver_id, order_time, start_time, finish_time, mileage, fare, grade) VALUES
(9011, 101, 211, '2021-09-25 08:30:00', '2021-09-25 08:31:00', '2021-09-25 08:54:00', 10, 35, 5),
(9012, 102, 211, '2021-09-25 09:01:00', '2021-09-25 09:01:50', '2021-09-25 09:28:00', 11, 32, 5),
(9013, 103, 212, '2021-09-26 08:01:00', '2021-09-26 08:03:00', '2021-09-26 08:27:00', 12, 31, 4),
(9023, 104, 213, '2021-09-26 08:01:00', null, '2021-09-26 08:27:00', null, null, null),
(9014, 104, 212, '2021-09-27 08:01:00', '2021-09-27 08:04:00', '2021-09-27 08:21:00', 11, 31, 5),
(9015, 105, 212, '2021-09-28 08:02:10', '2021-09-28 08:04:10', '2021-09-28 08:25:10', 12, 31, 4),
(9016, 106, 213, '2021-09-29 18:01:00', '2021-09-29 18:02:10', '2021-09-29 18:23:00', 11, 39, 4),
(9017, 107, 213, '2021-09-30 11:01:00', '2021-09-30 11:01:40', '2021-09-30 11:31:00', 11, 38, 5),
(9018, 108, 214, '2021-09-30 21:01:00', '2021-09-30 21:02:50', '2021-09-30 21:21:00', 14, 38, 5),
(9002, 102, 202, '2021-10-01 09:01:00', '2021-10-01 09:06:00', '2021-10-01 09:31:00', 10.0, 41.5, 5),
(9006, 106, 203, '2021-10-01 18:01:00', '2021-10-01 18:09:00', '2021-10-01 18:31:00', 8.0, 25.5, 4),
(9001, 101, 202, '2021-10-02 08:30:00', null, '2021-10-02 08:31:00', null, null, null),
(9007, 107, 203, '2021-10-02 11:01:00', '2021-10-02 11:07:00', '2021-10-02 11:31:00', 9.9, 30, 5),
(9008, 108, 204, '2021-10-02 21:01:00', '2021-10-02 21:10:00', '2021-10-02 21:31:00', 13.2, 38, 4),
(9003, 103, 202, '2021-10-02 08:01:00', '2021-10-02 08:15:00', '2021-10-02 08:31:00', 11.0, 41.5, 4),
(9004, 104, 202, '2021-10-03 08:01:00', '2021-10-03 08:13:00', '2021-10-03 08:31:00', 7.5, 22, 4),
(9009, 109, 204, '2021-10-03 18:01:00', null, '2021-10-03 18:51:00', null, null, null);
select
dt
,finish_num_7d
,cancel_num_7d
from (
select
dt
,round(sum(finish_num) over(order by dt rows 6 preceding)/7,2) finish_num_7d # 近7日日均订单完成量
,round(sum(cancel_num) over(order by dt rows 6 preceding)/7,2) cancel_num_7d # 近7日日均订单取消量
from (
# 每日的订单完成,取消情况
select
date(order_time) dt # 当有司机接单时,在打车订单表生成一条订单,填充order_time-接单时间及其左边的字段
,count(order_time) order_cnt # 每日总订单数量
,count(start_time) finish_num # 每日订单完成数量(当司机接上乘客时,填充订单表中该start_time-开始计费的上车时间)
,count(order_time)-count(start_time) cancel_num # 每日订单取消数量
from tb_get_car_order
group by date(order_time)
) a
) a1
where dt between '2021-10-01' and '2021-10-03' # 国庆头3天里
order by dt
;
5.工作日各时段叫车量、等待接单时间和调度时间
题目:统计周一到周五各时段的叫车量、平均等待接单时间和平均调度时间。全部以event_time-开始打车时间为时段划分依据,平均等待接单时间和平均调度时间均保留1位小数,平均调度时间仅计算完成了的订单,结果按叫车量升序排序。
--输入:
DROP TABLE IF EXISTS tb_get_car_record,tb_get_car_order;
CREATE TABLE tb_get_car_record (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
city VARCHAR(10) NOT NULL COMMENT '城市',
event_time datetime COMMENT '打车时间',
end_time datetime COMMENT '打车结束时间',
order_id INT COMMENT '订单号'
) CHARACTER SET utf8 COLLATE utf8_bin;
CREATE TABLE tb_get_car_order (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
order_id INT NOT NULL COMMENT '订单号',
uid INT NOT NULL COMMENT '用户ID',
driver_id INT NOT NULL COMMENT '司机ID',
order_time datetime COMMENT '接单时间',
start_time datetime COMMENT '开始计费的上车时间',
finish_time datetime COMMENT '订单结束时间',
mileage FLOAT COMMENT '行驶里程数',
fare FLOAT COMMENT '费用',
grade TINYINT COMMENT '评分'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_get_car_record(uid, city, event_time, end_time, order_id) VALUES
(107, '北京', '2021-09-20 11:00:00', '2021-09-20 11:00:30', 9017),
(108, '北京', '2021-09-20 21:00:00', '2021-09-20 21:00:40', 9008),
(108, '北京', '2021-09-20 18:59:30', '2021-09-20 19:01:00', 9018),
(102, '北京', '2021-09-21 08:59:00', '2021-09-21 09:01:00', 9002),
(106, '北京', '2021-09-21 17:58:00', '2021-09-21 18:01:00', 9006),
(103, '北京', '2021-09-22 07:58:00', '2021-09-22 08:01:00', 9003),
(104, '北京', '2021-09-23 07:59:00', '2021-09-23 08:01:00', 9004),
(103, '北京', '2021-09-24 19:59:20', '2021-09-24 20:01:00', 9019),
(101, '北京', '2021-09-24 08:28:10', '2021-09-24 08:30:00', 9011);
INSERT INTO tb_get_car_order(order_id, uid, driver_id, order_time, start_time, finish_time, mileage, fare, grade) VALUES
(9017, 107, 213, '2021-09-20 11:00:30', '2021-09-20 11:02:10', '2021-09-20 11:31:00', 11, 38, 5),
(9008, 108, 204, '2021-09-20 21:00:40', '2021-09-20 21:03:00', '2021-09-20 21:31:00', 13.2, 38, 4),
(9018, 108, 214, '2021-09-20 19:01:00', '2021-09-20 19:04:50', '2021-09-20 19:21:00', 14, 38, 5),
(9002, 102, 202, '2021-09-21 09:01:00', '2021-09-21 09:06:00', '2021-09-21 09:31:00', 10.0, 41.5, 5),
(9006, 106, 203, '2021-09-21 18:01:00', '2021-09-21 18:09:00', '2021-09-21 18:31:00', 8.0, 25.5, 4),
(9007, 107, 203, '2021-09-22 11:01:00', '2021-09-22 11:07:00', '2021-09-22 11:31:00', 9.9, 30, 5),
(9003, 103, 202, '2021-09-22 08:01:00', '2021-09-22 08:15:00', '2021-09-22 08:31:00', 11.0, 41.5, 4),
(9004, 104, 202, '2021-09-23 08:01:00', '2021-09-23 08:13:00', '2021-09-23 08:31:00', 7.5, 22, 4),
(9005, 105, 202, '2021-09-23 10:01:00', '2021-09-23 10:13:00', '2021-09-23 10:31:00', 9, 29, 5),
(9019, 103, 202, '2021-09-24 20:01:00', '2021-09-24 20:11:00', '2021-09-24 20:51:00', 10, 39, 4),
(9011, 101, 211, '2021-09-24 08:30:00', '2021-09-24 08:31:00', '2021-09-24 08:54:00', 10, 35, 5);
select
period
,count(event_time) get_car_num # 叫车量
,round(avg(wait_time)/60,1) avg_wait_time # 平均等待接单时间(分钟)
,round(avg(dispatch_time)/60,1) avg_dispatch_time # 平均调度时间(分钟)
from (
select
a.order_id
,a.event_time # 打车时间
,a.period
,b.order_time # 接单时间
,b.start_time # 开始计费的上车时间
,timestampdiff(second,a.event_time,b.order_time) wait_time # 从开始打车到司机接单为等待接单时间(秒)
,timestampdiff(second,b.order_time,b.start_time) dispatch_time # 从司机接单到上车为调度时间(秒)
from (
select
order_id
,event_time
,case when date_format(event_time,'%H:%i:%S')>='07:00:00' and date_format(event_time,'%H:%i:%S')<'09:00:00' then '早高峰'
when date_format(event_time,'%H:%i:%S')>='09:00:00' and date_format(event_time,'%H:%i:%S')<'17:00:00' then '工作时间'
when date_format(event_time,'%H:%i:%S')>='17:00:00' and date_format(event_time,'%H:%i:%S')<'20:00:00' then '晚高峰'
when date_format(event_time,'%H:%i:%S')>='20:00:00' or date_format(event_time,'%H:%i:%S')<'07:00:00' then '休息时间'
else null end period # 各时段:%H 小时(00-23)、%i 分钟(00-59)、%S/s秒(00-59)
from tb_get_car_record
where date_format(event_time,'%w')not in ('0','6') # %w 周的天 (0=星期日, 6=星期六),周一到周五
) a join (
select
order_id
,order_time
,start_time
from tb_get_car_order
) b on a.order_id=b.order_id
) a1
group by period
order by get_car_num
;
6.各城市最大同时等车人数
题目:请统计各个城市在2021年10月期间,单日中最大的同时等车人数。
DROP TABLE IF EXISTS tb_get_car_record,tb_get_car_order;
CREATE TABLE tb_get_car_record (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
city VARCHAR(10) NOT NULL COMMENT '城市',
event_time datetime COMMENT '打车时间',
end_time datetime COMMENT '打车结束时间',
order_id INT COMMENT '订单号'
) CHARACTER SET utf8 COLLATE utf8_bin;
CREATE TABLE tb_get_car_order (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
order_id INT NOT NULL COMMENT '订单号',
uid INT NOT NULL COMMENT '用户ID',
driver_id INT NOT NULL COMMENT '司机ID',
order_time datetime COMMENT '接单时间',
start_time datetime COMMENT '开始计费的上车时间',
finish_time datetime COMMENT '订单结束时间',
mileage FLOAT COMMENT '行驶里程数',
fare FLOAT COMMENT '费用',
grade TINYINT COMMENT '评分'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_get_car_record(uid, city, event_time, end_time, order_id) VALUES
(108, '北京', '2021-10-20 08:00:00', '2021-10-20 08:00:40', 9008),
(108, '北京', '2021-10-20 08:00:10', '2021-10-20 08:00:45', 9018),
(102, '北京', '2021-10-20 08:00:30', '2021-10-20 08:00:50', 9002),
(106, '北京', '2021-10-20 08:05:41', '2021-10-20 08:06:00', 9006),
(103, '北京', '2021-10-20 08:05:50', '2021-10-20 08:07:10', 9003),
(104, '北京', '2021-10-20 08:01:01', '2021-10-20 08:01:20', 9004),
(103, '北京', '2021-10-20 08:01:15', '2021-10-20 08:01:30', 9019),
(101, '北京', '2021-10-20 08:28:10', '2021-10-20 08:30:00', 9011);
INSERT INTO tb_get_car_order(order_id, uid, driver_id, order_time, start_time, finish_time, mileage, fare, grade) VALUES
(9008, 108, 204, '2021-10-20 08:00:40', '2021-10-20 08:03:00', '2021-10-20 08:31:00', 13.2, 38, 4),
(9018, 108, 214, '2021-10-20 08:00:45', '2021-10-20 08:04:50', '2021-10-20 08:21:00', 14, 38, 5),
(9002, 102, 202, '2021-10-20 08:00:50', '2021-10-20 08:06:00', '2021-10-20 08:31:00', 10.0, 41.5, 5),
(9006, 106, 203, '2021-10-20 08:06:00', '2021-10-20 08:09:00', '2021-10-20 08:31:00', 8.0, 25.5, 4),
(9003, 103, 202, '2021-10-20 08:07:10', '2021-10-20 08:15:00', '2021-10-20 08:31:00', 11.0, 41.5, 4),
(9004, 104, 202, '2021-10-20 08:01:20', '2021-10-20 08:13:00', '2021-10-20 08:31:00', 7.5, 22, 4),
(9019, 103, 202, '2021-10-20 08:01:30', '2021-10-20 08:11:00', '2021-10-20 08:51:00', 10, 39, 4),
(9011, 101, 211, '2021-10-20 08:30:00', '2021-10-20 08:31:00', '2021-10-20 08:54:00', 10, 35, 5);
with t1 as (
select
a.uid
,a.city
,a.event_time # 打车时间
,a.end_time # 打车结束时间
,a.order_id
,b.order_time # 接单时间
,b.start_time # 开始计费的上车时间
,b.finish_time # 订单完成时间
from tb_get_car_record a
left join tb_get_car_order b
on a.order_id=b.order_id
where substr(a.event_time,1,7)='2021-10' # 2021年10月期间
)
select
city
,max(num) max_wait_uv
from(
select
city
,uid
,time
,sum(tag) over(partition by city order by time,tag desc) num # 使用窗口函数对每个城市的等车状态进行累加(如果同一时刻有人停止等车,有人开始等车,等车人数记作先增加后减少)
from (
# 对用户进入和离开的时间进行定义
select
uid
,city
,event_time time # 进入时间:开始打车的时间即为等车开始,记录event_time
,1 tag
from t1
union all
select
uid
,city
,end_time time # 离开时间1:在发出打车指令后,司机接单前取消,则没有生成order_id,这种情况 order_id is null,记录end_time
,-1 tag
from t1
where order_time is null
union all
select
uid
,city
,finish_time time # 离开时间2:在司机接单后,尚未抵达(用户未上车)前,用户或司机取消订单,start_time is null,记录finish_time
,-1 tag
from t1
where start_time is null
union all
select
uid
,city
,start_time time # 离开时间3:,结束等待,记录start_time,start_time is not null
,-1 tag
from t1
where start_time is not null
# 状态2和3可以直接使用ifnull()合并,ifnull(start_time,finish_time) 如果start_time空则返回finish_time,不空则start_time
) a
) a1
group by city
order by max_wait_uv,city # 结果按各城市最大等车人数升序排序,相同时按城市升序排序
;
2021年10月,每个城市用户进入和离开时间详情:
2021年10月,每个城市的瞬时等车状态情况:
五、某宝店铺分析(电商模式)
1.某宝店铺的SPU数量
题目:11月结束后,小牛同学需要对其在某宝的网店就11月份用户交易情况和产品情况进行分析以更好的经营小店。请你统计每款的SPU(货号)数量,并按SPU数量降序排序
--输入:
drop table if exists product_tb;
CREATE TABLE product_tb(
item_id char(10) NOT NULL,
style_id char(10) NOT NULL,
tag_price int(10) NOT NULL,
inventory int(10) NOT NULL
);
INSERT INTO product_tb VALUES('A001', 'A', 100, 20);
INSERT INTO product_tb VALUES('A002', 'A', 120, 30);
INSERT INTO product_tb VALUES('A003', 'A', 200, 15);
INSERT INTO product_tb VALUES('B001', 'B', 130, 18);
INSERT INTO product_tb VALUES('B002', 'B', 150, 22);
INSERT INTO product_tb VALUES('B003', 'B', 125, 10);
INSERT INTO product_tb VALUES('B004', 'B', 155, 12);
INSERT INTO product_tb VALUES('C001', 'C', 260, 25);
INSERT INTO product_tb VALUES('C002', 'C', 280, 18);
select
style_id
,count(item_id) SPU_num
from product_tb
group by style_id
order by SPU_num desc
;
2.某宝店铺的实际销售额与客单价
题目:11月结束后,小牛同学需要对其在某宝的网店就11月份用户交易情况和产品情况进行分析以更好的经营小店。请你统计实际总销售额与客单价(人均付费,总收入/总用户数,结果保留两位小数),
--输入:
drop table if exists sales_tb;
CREATE TABLE sales_tb(
sales_date date NOT NULL,
user_id int(10) NOT NULL,
item_id char(10) NOT NULL,
sales_num int(10) NOT NULL,
sales_price int(10) NOT NULL
);
INSERT INTO sales_tb VALUES('2021-11-1', 1, 'A001', 1, 90);
INSERT INTO sales_tb VALUES('2021-11-1', 2, 'A002', 2, 220);
INSERT INTO sales_tb VALUES('2021-11-1', 2, 'B001', 1, 120);
INSERT INTO sales_tb VALUES('2021-11-2', 3, 'C001', 2, 500);
INSERT INTO sales_tb VALUES('2021-11-2', 4, 'B001', 1, 120);
INSERT INTO sales_tb VALUES('2021-11-3', 5, 'C001', 1, 240);
INSERT INTO sales_tb VALUES('2021-11-3', 6, 'C002', 1, 270);
INSERT INTO sales_tb VALUES('2021-11-4', 7, 'A003', 1, 180);
INSERT INTO sales_tb VALUES('2021-11-4', 8, 'B002', 1, 140);
INSERT INTO sales_tb VALUES('2021-11-4', 9, 'B001', 1, 125);
INSERT INTO sales_tb VALUES('2021-11-5', 10, 'B003', 1, 120);
INSERT INTO sales_tb VALUES('2021-11-5', 10, 'B004', 1, 150);
INSERT INTO sales_tb VALUES('2021-11-5', 10, 'A003', 1, 180);
INSERT INTO sales_tb VALUES('2021-11-6', 11, 'B003', 1, 120);
INSERT INTO sales_tb VALUES('2021-11-6', 10, 'B004', 1, 150);
select
sum(sales_price) sales_total
,round(sum(sales_price)/count(distinct user_id),2) per_trans
from sales_tb
;
3.某宝店铺折扣率
题目:11月结束后,小牛同学需要对其在某宝的网店就11月份用户交易情况和产品情况进行分析以更好的经营小店。请你统计折扣率(GMV/吊牌金额,GMV指的是成交金额)
--输入:
drop table if exists product_tb;
CREATE TABLE product_tb(
item_id char(10) NOT NULL,
style_id char(10) NOT NULL,
tag_price int(10) NOT NULL,
inventory int(10) NOT NULL
);
INSERT INTO product_tb VALUES('A001', 'A', 100, 20);
INSERT INTO product_tb VALUES('A002', 'A', 120, 30);
INSERT INTO product_tb VALUES('A003', 'A', 200, 15);
INSERT INTO product_tb VALUES('B001', 'B', 130, 18);
INSERT INTO product_tb VALUES('B002', 'B', 150, 22);
INSERT INTO product_tb VALUES('B003', 'B', 125, 10);
INSERT INTO product_tb VALUES('B004', 'B', 155, 12);
INSERT INTO product_tb VALUES('C001', 'C', 260, 25);
INSERT INTO product_tb VALUES('C002', 'C', 280, 18);
drop table if exists sales_tb;
CREATE TABLE sales_tb(
sales_date date NOT NULL,
user_id int(10) NOT NULL,
item_id char(10) NOT NULL,
sales_num int(10) NOT NULL,
sales_price int(10) NOT NULL
);
INSERT INTO sales_tb VALUES('2021-11-1', 1, 'A001', 1, 90);
INSERT INTO sales_tb VALUES('2021-11-1', 2, 'A002', 2, 220);
INSERT INTO sales_tb VALUES('2021-11-1', 2, 'B001', 1, 120);
INSERT INTO sales_tb VALUES('2021-11-2', 3, 'C001', 2, 500);
INSERT INTO sales_tb VALUES('2021-11-2', 4, 'B001', 1, 120);
INSERT INTO sales_tb VALUES('2021-11-3', 5, 'C001', 1, 240);
INSERT INTO sales_tb VALUES('2021-11-3', 6, 'C002', 1, 270);
INSERT INTO sales_tb VALUES('2021-11-4', 7, 'A003', 1, 180);
INSERT INTO sales_tb VALUES('2021-11-4', 8, 'B002', 1, 140);
INSERT INTO sales_tb VALUES('2021-11-4', 9, 'B001', 1, 125);
INSERT INTO sales_tb VALUES('2021-11-5', 10, 'B003', 1, 120);
INSERT INTO sales_tb VALUES('2021-11-5', 10, 'B004', 1, 150);
INSERT INTO sales_tb VALUES('2021-11-5', 10, 'A003', 1, 180);
INSERT INTO sales_tb VALUES('2021-11-6', 11, 'B003', 1, 120);
INSERT INTO sales_tb VALUES('2021-11-6', 10, 'B004', 1, 150);
select
round(sum(sales_price)/sum(tag_price_total)*100,2) 'discount_rate(%)' # 折扣率(%)
from (
select
b.sales_date # 销售日期
,b.item_id # 货号
,b.sales_price # 结算金额
,b.sales_num # 销售数量
,a.tag_price*b.sales_num tag_price_total # 总标签价格
,a.inventory # 库存量
from (
select
sales_date
,item_id
,sales_price
,sales_num
from sales_tb
) b left join (
select
item_id
,tag_price
,inventory
from product_tb
) a on a.item_id=b.item_id
) a1
;
4.某宝店铺动销率与售罄率
题目:11月结束后,小牛同学需要对其在某宝的网店就11月份用户交易情况和产品情况进行分析以更好的经营小店。请你统计每款的动销率(pin_rate,有销售的SKU数量/在售SKU数量)与售罄率(sell-through_rate,GMV/备货值,备货值=吊牌价*库存数),按style_id升序排序。
--输入:
drop table if exists product_tb;
CREATE TABLE product_tb(
item_id char(10) NOT NULL,
style_id char(10) NOT NULL,
tag_price int(10) NOT NULL,
inventory int(10) NOT NULL
);
INSERT INTO product_tb VALUES('A001', 'A', 100, 20);
INSERT INTO product_tb VALUES('A002', 'A', 120, 30);
INSERT INTO product_tb VALUES('A003', 'A', 200, 15);
INSERT INTO product_tb VALUES('B001', 'B', 130, 18);
INSERT INTO product_tb VALUES('B002', 'B', 150, 22);
INSERT INTO product_tb VALUES('B003', 'B', 125, 10);
INSERT INTO product_tb VALUES('B004', 'B', 155, 12);
INSERT INTO product_tb VALUES('C001', 'C', 260, 25);
INSERT INTO product_tb VALUES('C002', 'C', 280, 18);
drop table if exists sales_tb;
CREATE TABLE sales_tb(
sales_date date NOT NULL,
user_id int(10) NOT NULL,
item_id char(10) NOT NULL,
sales_num int(10) NOT NULL,
sales_price int(10) NOT NULL
);
INSERT INTO sales_tb VALUES('2021-11-1', 1, 'A001', 1, 90);
INSERT INTO sales_tb VALUES('2021-11-1', 2, 'A002', 2, 220);
INSERT INTO sales_tb VALUES('2021-11-1', 2, 'B001', 1, 120);
INSERT INTO sales_tb VALUES('2021-11-2', 3, 'C001', 2, 500);
INSERT INTO sales_tb VALUES('2021-11-2', 4, 'B001', 1, 120);
INSERT INTO sales_tb VALUES('2021-11-3', 5, 'C001', 1, 240);
INSERT INTO sales_tb VALUES('2021-11-3', 6, 'C002', 1, 270);
INSERT INTO sales_tb VALUES('2021-11-4', 7, 'A003', 1, 180);
INSERT INTO sales_tb VALUES('2021-11-4', 8, 'B002', 1, 140);
INSERT INTO sales_tb VALUES('2021-11-4', 9, 'B001', 1, 125);
INSERT INTO sales_tb VALUES('2021-11-5', 10, 'B003', 1, 120);
INSERT INTO sales_tb VALUES('2021-11-5', 10, 'B004', 1, 150);
INSERT INTO sales_tb VALUES('2021-11-5', 10, 'A003', 1, 180);
INSERT INTO sales_tb VALUES('2021-11-6', 11, 'B003', 1, 120);
INSERT INTO sales_tb VALUES('2021-11-6', 10, 'B004', 1, 150);
select
style_id
,round(sum(sales_num)/sum(inventory-sales_num)*100,2) 'pin_rate(%)' # 动销率=有销售的SKU数量/在售SKU数量
,round(sum(sales_price)/sum(tag_price_total)*100,2) 'sell-through_rate(%)' # 售罄率=GMV/备货值,备货值=吊牌价*库存数
from (
select
item_id
,style_id
,tag_price # 吊牌价
,inventory # 库存数
,tag_price*inventory tag_price_total # 备货值=吊牌价*库存数
from product_tb
) a left join (
select
item_id
,sum(sales_price) sales_price # 结算金额
,sum(sales_num) sales_num # 有销售的数量
from sales_tb
group by item_id
) b on a.item_id=b.item_id
group by style_id
order by style_id
;
5.某宝店铺连续2天及以上购物的用户及其对应的天数
题目:11月结束后,小牛同学需要对其在某宝的网店就11月份用户交易情况和产品情况进行分析以更好的经营小店。请你统计连续2天及以上在该店铺购物的用户及其对应的次数(若有多个用户,按user_id升序排序)。
--输入:
drop table if exists sales_tb;
CREATE TABLE sales_tb(
sales_date date NOT NULL,
user_id int(10) NOT NULL,
item_id char(10) NOT NULL,
sales_num int(10) NOT NULL,
sales_price int(10) NOT NULL
);
INSERT INTO sales_tb VALUES('2021-11-1', 1, 'A001', 1, 90);
INSERT INTO sales_tb VALUES('2021-11-1', 2, 'A002', 2, 220);
INSERT INTO sales_tb VALUES('2021-11-1', 2, 'B001', 1, 120);
INSERT INTO sales_tb VALUES('2021-11-2', 3, 'C001', 2, 500);
INSERT INTO sales_tb VALUES('2021-11-2', 4, 'B001', 1, 120);
INSERT INTO sales_tb VALUES('2021-11-3', 5, 'C001', 1, 240);
INSERT INTO sales_tb VALUES('2021-11-3', 6, 'C002', 1, 270);
INSERT INTO sales_tb VALUES('2021-11-4', 7, 'A003', 1, 180);
INSERT INTO sales_tb VALUES('2021-11-4', 8, 'B002', 1, 140);
INSERT INTO sales_tb VALUES('2021-11-4', 9, 'B001', 1, 125);
INSERT INTO sales_tb VALUES('2021-11-5', 10, 'B003', 1, 120);
INSERT INTO sales_tb VALUES('2021-11-5', 10, 'B004', 1, 150);
INSERT INTO sales_tb VALUES('2021-11-5', 10, 'A003', 1, 180);
INSERT INTO sales_tb VALUES('2021-11-6', 11, 'B003', 1, 120);
INSERT INTO sales_tb VALUES('2021-11-6', 10, 'B004', 1, 150);
with t1 as (
select
user_id
from (
select
user_id
,sales_date
,rn
,diff_date
,dense_rank() over(partition by user_id,diff_date order by sales_date) rnn # 判断连续日期,对每个用户的连续购买日期进行聚类并统计,如果统计后天数大于等于2则为连续用户
from (
select
user_id
,sales_date
,rn
,date_sub(sales_date,interval rn day) diff_date # 使用date_sub()函数并聚合后判断日期连续,如果sales_date 减去rn(天)是相同的日期,则说明他们这些天都是连续的日期(所以要使用dense_rank()并且对日期进行去重,不然重复的日期也会被记作连续的天数)。
from (
select distinct # 防止一天有多次购买记录,distinct去重
user_id
,sales_date
,dense_rank() over(partition by user_id order by sales_date) rn # 首先对日期进行排序
from sales_tb
) a
) a1
) a2
where rnn>=2
group by user_id
)
select
user_id
,count(distinct sales_date) days_count
from sales_tb
where user_id in (select user_id from t1)
group by user_id
order by user_id
;
六、牛客直播课分析(在线教育行业)
1.牛客直播转换率
题目:牛客某页面推出了数据分析系列直播课程介绍。用户可以选择报名任意一场或多场直播课。请你统计每个科目的转换率(sign_rate(%),转化率=报名人数/浏览人数,结果保留两位小数)。
--输入:
drop table if exists course_tb;
CREATE TABLE course_tb(
course_id int(10) NOT NULL,
course_name char(10) NOT NULL,
course_datetime char(30) NOT NULL);
INSERT INTO course_tb VALUES(1, 'Python', '2021-12-1 19:00-21:00');
INSERT INTO course_tb VALUES(2, 'SQL', '2021-12-2 19:00-21:00');
INSERT INTO course_tb VALUES(3, 'R', '2021-12-3 19:00-21:00');
drop table if exists behavior_tb;
CREATE TABLE behavior_tb(
user_id int(10) NOT NULL,
if_vw int(10) NOT NULL,
if_fav int(10) NOT NULL,
if_sign int(10) NOT NULL,
course_id int(10) NOT NULL);
INSERT INTO behavior_tb VALUES(100, 1, 1, 1, 1);
INSERT INTO behavior_tb VALUES(100, 1, 1, 1, 2);
INSERT INTO behavior_tb VALUES(100, 1, 1, 1, 3);
INSERT INTO behavior_tb VALUES(101, 1, 1, 1, 1);
INSERT INTO behavior_tb VALUES(101, 1, 1, 1, 2);
INSERT INTO behavior_tb VALUES(101, 1, 0, 0, 3);
INSERT INTO behavior_tb VALUES(102, 1, 1, 1, 1);
INSERT INTO behavior_tb VALUES(102, 1, 1, 1, 2);
INSERT INTO behavior_tb VALUES(102, 1, 1, 1, 3);
INSERT INTO behavior_tb VALUES(103, 1, 1, 0, 1);
INSERT INTO behavior_tb VALUES(103, 1, 0, 0, 2);
INSERT INTO behavior_tb VALUES(103, 1, 0, 0, 3);
INSERT INTO behavior_tb VALUES(104, 1, 1, 1, 1);
INSERT INTO behavior_tb VALUES(104, 1, 1, 1, 2);
INSERT INTO behavior_tb VALUES(104, 1, 1, 0, 3);
INSERT INTO behavior_tb VALUES(105, 1, 0, 0, 1);
INSERT INTO behavior_tb VALUES(106, 1, 0, 0, 1);
INSERT INTO behavior_tb VALUES(107, 1, 0, 0, 1);
INSERT INTO behavior_tb VALUES(107, 1, 1, 1, 2);
INSERT INTO behavior_tb VALUES(108, 1, 1, 1, 3);
select
a.course_id
,a.course_name
,round(sum(if_sign)/sum(if_vw)*100,2) 'sign_rate(%)'
from (
select
course_id
,course_name
from course_tb
) a left join (
select
user_id
,if_sign # 是否报名
,if_vw # 是否浏览
,course_id
from behavior_tb
) b on a.course_id=b.course_id
group by a.course_id,a.course_name
order by a.course_id
;
2.牛客直播开始时各直播间在线人数
题目:牛客某页面推出了数据分析系列直播课程介绍。用户可以选择报名任意一场或多场直播课。请你统计直播开始时(19:00),各科目的在线人数,以上例子的输出结果为(按照course_id升序排序)。
--输入:
CREATE TABLE course_tb(
course_id int(10) NOT NULL,
course_name char(10) NOT NULL,
course_datetime char(30) NOT NULL);
INSERT INTO course_tb VALUES(1, 'Python', '2021-12-1 19:00-21:00');
INSERT INTO course_tb VALUES(2, 'SQL', '2021-12-2 19:00-21:00');
INSERT INTO course_tb VALUES(3, 'R', '2021-12-3 19:00-21:00');
CREATE TABLE attend_tb(
user_id int(10) NOT NULL,
course_id int(10) NOT NULL,
in_datetime datetime NOT NULL,
out_datetime datetime NOT NULL
);
INSERT INTO attend_tb VALUES(100, 1, '2021-12-1 19:00:00', '2021-12-1 19:28:00');
INSERT INTO attend_tb VALUES(100, 1, '2021-12-1 19:30:00', '2021-12-1 19:53:00');
INSERT INTO attend_tb VALUES(101, 1, '2021-12-1 19:00:00', '2021-12-1 20:55:00');
INSERT INTO attend_tb VALUES(102, 1, '2021-12-1 19:00:00', '2021-12-1 19:05:00');
INSERT INTO attend_tb VALUES(104, 1, '2021-12-1 19:00:00', '2021-12-1 20:59:00');
INSERT INTO attend_tb VALUES(101, 2, '2021-12-2 19:05:00', '2021-12-2 20:58:00');
INSERT INTO attend_tb VALUES(102, 2, '2021-12-2 18:55:00', '2021-12-2 21:00:00');
INSERT INTO attend_tb VALUES(104, 2, '2021-12-2 18:57:00', '2021-12-2 20:56:00');
INSERT INTO attend_tb VALUES(107, 2, '2021-12-2 19:10:00', '2021-12-2 19:18:00');
INSERT INTO attend_tb VALUES(100, 3, '2021-12-3 19:01:00', '2021-12-3 21:00:00');
INSERT INTO attend_tb VALUES(102, 3, '2021-12-3 18:58:00', '2021-12-3 19:05:00');
INSERT INTO attend_tb VALUES(108, 3, '2021-12-3 19:01:00', '2021-12-3 19:56:00');
select
a.course_id
,b.course_name
,count(distinct a.user_id) online_num
from (
select
user_id
,course_id
,in_datetime
from attend_tb
) a left join (
select
course_id
,course_name
from course_tb
) b on a.course_id=b.course_id
where substr(a.in_datetime,12,8)<='19:00:00' # 直播开始时(19:00)
group by a.course_id,b.course_name
order by a.course_id
;
3.牛客直播各科目平均观看时长
题目:牛客某页面推出了数据分析系列直播课程介绍。用户可以选择报名任意一场或多场直播课。请你统计每个科目的平均观看时长(观看时长定义为离开直播间的时间与进入直播间的时间之差,单位是分钟),输出结果按平均观看时长降序排序,结果保留两位小数。
--输入:
drop table if exists course_tb;
CREATE TABLE course_tb(
course_id int(10) NOT NULL,
course_name char(10) NOT NULL,
course_datetime char(30) NOT NULL);
INSERT INTO course_tb VALUES(1, 'Python', '2021-12-1 19:00-21:00');
INSERT INTO course_tb VALUES(2, 'SQL', '2021-12-2 19:00-21:00');
INSERT INTO course_tb VALUES(3, 'R', '2021-12-3 19:00-21:00');
drop table if exists attend_tb;
CREATE TABLE attend_tb(
user_id int(10) NOT NULL,
course_id int(10) NOT NULL,
in_datetime datetime NOT NULL,
out_datetime datetime NOT NULL
);
INSERT INTO attend_tb VALUES(100, 1, '2021-12-1 19:00:00', '2021-12-1 19:28:00');
INSERT INTO attend_tb VALUES(100, 1, '2021-12-1 19:30:00', '2021-12-1 19:53:00');
INSERT INTO attend_tb VALUES(101, 1, '2021-12-1 19:00:00', '2021-12-1 20:55:00');
INSERT INTO attend_tb VALUES(102, 1, '2021-12-1 19:00:00', '2021-12-1 19:05:00');
INSERT INTO attend_tb VALUES(104, 1, '2021-12-1 19:00:00', '2021-12-1 20:59:00');
INSERT INTO attend_tb VALUES(101, 2, '2021-12-2 19:05:00', '2021-12-2 20:58:00');
INSERT INTO attend_tb VALUES(102, 2, '2021-12-2 18:55:00', '2021-12-2 21:00:00');
INSERT INTO attend_tb VALUES(104, 2, '2021-12-2 18:57:00', '2021-12-2 20:56:00');
INSERT INTO attend_tb VALUES(107, 2, '2021-12-2 19:10:00', '2021-12-2 19:18:00');
INSERT INTO attend_tb VALUES(100, 3, '2021-12-3 19:01:00', '2021-12-3 21:00:00');
INSERT INTO attend_tb VALUES(102, 3, '2021-12-3 18:58:00', '2021-12-3 19:05:00');
INSERT INTO attend_tb VALUES(108, 3, '2021-12-3 19:01:00', '2021-12-3 19:56:00');
select
b.course_name
,round(avg(a.diff),2) avg_Len # 每个科目的平均观看时长
from (
select
course_id
,in_datetime
,out_datetime
,timestampdiff(minute,in_datetime,out_datetime) diff # 每个课程的观看时长
from attend_tb
) a left join (
select
course_id
,course_name
from course_tb
) b on a.course_id=b.course_id
group by b.course_name
order by avg_Len desc
;
4.牛客直播各科目出勤率
题目:牛客某页面推出了数据分析系列直播课程介绍。用户可以选择报名任意一场或多场直播课。请你统计每个科目的出勤率(attend_rate(%),结果保留两位小数),出勤率=出勤(在线时长10分钟及以上)人数 / 报名人数,输出结果按course_id升序排序,以上数据的输出结果如下。
--输入:
drop table if exists course_tb;
CREATE TABLE course_tb(
course_id int(10) NOT NULL,
course_name char(10) NOT NULL,
course_datetime char(30) NOT NULL);
INSERT INTO course_tb VALUES(1, 'Python', '2021-12-1 19:00-21:00');
INSERT INTO course_tb VALUES(2, 'SQL', '2021-12-2 19:00-21:00');
INSERT INTO course_tb VALUES(3, 'R', '2021-12-3 19:00-21:00');
drop table if exists behavior_tb;
CREATE TABLE behavior_tb(
user_id int(10) NOT NULL,
if_vw int(10) NOT NULL,
if_fav int(10) NOT NULL,
if_sign int(10) NOT NULL,
course_id int(10) NOT NULL);
INSERT INTO behavior_tb VALUES(100, 1, 1, 1, 1);
INSERT INTO behavior_tb VALUES(100, 1, 1, 1, 2);
INSERT INTO behavior_tb VALUES(100, 1, 1, 1, 3);
INSERT INTO behavior_tb VALUES(101, 1, 1, 1, 1);
INSERT INTO behavior_tb VALUES(101, 1, 1, 1, 2);
INSERT INTO behavior_tb VALUES(101, 1, 0, 0, 3);
INSERT INTO behavior_tb VALUES(102, 1, 1, 1, 1);
INSERT INTO behavior_tb VALUES(102, 1, 1, 1, 2);
INSERT INTO behavior_tb VALUES(102, 1, 1, 1, 3);
INSERT INTO behavior_tb VALUES(103, 1, 1, 0, 1);
INSERT INTO behavior_tb VALUES(103, 1, 0, 0, 2);
INSERT INTO behavior_tb VALUES(103, 1, 0, 0, 3);
INSERT INTO behavior_tb VALUES(104, 1, 1, 1, 1);
INSERT INTO behavior_tb VALUES(104, 1, 1, 1, 2);
INSERT INTO behavior_tb VALUES(104, 1, 1, 0, 3);
INSERT INTO behavior_tb VALUES(105, 1, 0, 0, 1);
INSERT INTO behavior_tb VALUES(106, 1, 0, 0, 1);
INSERT INTO behavior_tb VALUES(107, 1, 0, 0, 1);
INSERT INTO behavior_tb VALUES(107, 1, 1, 1, 2);
INSERT INTO behavior_tb VALUES(108, 1, 1, 1, 3);
drop table if exists attend_tb;
CREATE TABLE attend_tb(
user_id int(10) NOT NULL,
course_id int(10) NOT NULL,
in_datetime datetime NOT NULL,
out_datetime datetime NOT NULL
);
INSERT INTO attend_tb VALUES(100, 1, '2021-12-1 19:00:00', '2021-12-1 19:28:00');
INSERT INTO attend_tb VALUES(100, 1, '2021-12-1 19:30:00', '2021-12-1 19:53:00');
INSERT INTO attend_tb VALUES(101, 1, '2021-12-1 19:00:00', '2021-12-1 20:55:00');
INSERT INTO attend_tb VALUES(102, 1, '2021-12-1 19:00:00', '2021-12-1 19:05:00');
INSERT INTO attend_tb VALUES(104, 1, '2021-12-1 19:00:00', '2021-12-1 20:59:00');
INSERT INTO attend_tb VALUES(101, 2, '2021-12-2 19:05:00', '2021-12-2 20:58:00');
INSERT INTO attend_tb VALUES(102, 2, '2021-12-2 18:55:00', '2021-12-2 21:00:00');
INSERT INTO attend_tb VALUES(104, 2, '2021-12-2 18:57:00', '2021-12-2 20:56:00');
INSERT INTO attend_tb VALUES(107, 2, '2021-12-2 19:10:00', '2021-12-2 19:18:00');
INSERT INTO attend_tb VALUES(100, 3, '2021-12-3 19:01:00', '2021-12-3 21:00:00');
INSERT INTO attend_tb VALUES(102, 3, '2021-12-3 18:58:00', '2021-12-3 19:05:00');
INSERT INTO attend_tb VALUES(108, 3, '2021-12-3 19:01:00', '2021-12-3 19:56:00');
select
a.course_id
,b.course_name
,round(max(attend_num)/count(distinct if(if_sign='1',a.user_id,null))*100,2) 'attend_rate(%)' # 出勤率=出勤(在线时长10分钟及以上)人数 / 报名人数
from (
select
user_id
,course_id
,if_sign # 是否报名
from behavior_tb
) a left join (
select
course_id
,course_name
from course_tb
) b on a.course_id=b.course_id
left join (
select
course_id
,count(distinct user_id) attend_num # 出勤人数
from (
select
user_id
,course_id
,timestampdiff(minute,in_datetime,out_datetime) diff
from attend_tb
where timestampdiff(minute,in_datetime,out_datetime)>=10 # 在线时长10分钟及以上
order by user_id
) cc
group by course_id
) c on a.course_id=c.course_id
group by a.course_id,b.course_name
order by course_id
;
5.牛客直播各科目同时在线人数
题目:牛客某页面推出了数据分析系列直播课程介绍。用户可以选择报名任意一场或多场直播课。请你统计每个科目最大同时在线人数(按course_id排序)。
--输入:
drop table if exists course_tb;
CREATE TABLE course_tb(
course_id int(10) NOT NULL,
course_name char(10) NOT NULL,
course_datetime char(30) NOT NULL);
INSERT INTO course_tb VALUES(1, 'Python', '2021-12-1 19:00-21:00');
INSERT INTO course_tb VALUES(2, 'SQL', '2021-12-2 19:00-21:00');
INSERT INTO course_tb VALUES(3, 'R', '2021-12-3 19:00-21:00');
drop table if exists attend_tb;
CREATE TABLE attend_tb(
user_id int(10) NOT NULL,
course_id int(10) NOT NULL,
in_datetime datetime NOT NULL,
out_datetime datetime NOT NULL
);
INSERT INTO attend_tb VALUES(100, 1, '2021-12-1 19:00:00', '2021-12-1 19:28:00');
INSERT INTO attend_tb VALUES(100, 1, '2021-12-1 19:30:00', '2021-12-1 19:53:00');
INSERT INTO attend_tb VALUES(101, 1, '2021-12-1 19:00:00', '2021-12-1 20:55:00');
INSERT INTO attend_tb VALUES(102, 1, '2021-12-1 19:00:00', '2021-12-1 19:05:00');
INSERT INTO attend_tb VALUES(104, 1, '2021-12-1 19:00:00', '2021-12-1 20:59:00');
INSERT INTO attend_tb VALUES(101, 2, '2021-12-2 19:05:00', '2021-12-2 20:58:00');
INSERT INTO attend_tb VALUES(102, 2, '2021-12-2 18:55:00', '2021-12-2 21:00:00');
INSERT INTO attend_tb VALUES(104, 2, '2021-12-2 18:57:00', '2021-12-2 20:56:00');
INSERT INTO attend_tb VALUES(107, 2, '2021-12-2 19:10:00', '2021-12-2 19:18:00');
INSERT INTO attend_tb VALUES(100, 3, '2021-12-3 19:01:00', '2021-12-3 21:00:00');
INSERT INTO attend_tb VALUES(102, 3, '2021-12-3 18:58:00', '2021-12-3 19:05:00');
INSERT INTO attend_tb VALUES(108, 3, '2021-12-3 19:01:00', '2021-12-3 19:56:00');
select
course_id
,course_name
,max(user_num) max_num # 每个科目最大同时在线人数
from(
select
course_id
,course_name
,user_id
,time
,tag
,sum(tag) over(partition by course_id,course_name order by time,tag desc) user_num # 每个时刻在线人数(若同一时刻既有进又有出,按照先进后出计算)
from (
select
a.course_id
,b.course_name
,a.user_id
,a.in_datetime time
,1 tag # 进入直播间
from (
select
user_id
,course_id
,in_datetime
from attend_tb
) a left join (
select
course_id
,course_name
from course_tb
) b on a.course_id=b.course_id
union all
select
a.course_id
,b.course_name
,a.user_id
,a.out_datetime time
,-1 tag # 离开直播间
from (
select
user_id
,course_id
,out_datetime
from attend_tb
) a left join (
select
course_id
,course_name
from course_tb
) b on a.course_id=b.course_id
-- order by course_id,course_name,user_id,time
) a1
) a2
group by course_id,course_name
order by course_id
;
七、某乎问答(内容行业)
1.某乎问答11月份日人均回答量
题目:请你统计11月份日人均回答量(回答问题数量/答题人数),按回答日期排序,结果保留两位小数
--输入:
drop table if exists answer_tb;
CREATE TABLE answer_tb(
answer_date date NOT NULL,
author_id int(10) NOT NULL,
issue_id char(10) NOT NULL,
char_len int(10) NOT NULL);
INSERT INTO answer_tb VALUES('2021-11-1', 101, 'E001' ,150);
INSERT INTO answer_tb VALUES('2021-11-1', 101, 'E002', 200);
INSERT INTO answer_tb VALUES('2021-11-1',102, 'C003' ,50);
INSERT INTO answer_tb VALUES('2021-11-1' ,103, 'P001', 35);
INSERT INTO answer_tb VALUES('2021-11-1', 104, 'C003', 120);
INSERT INTO answer_tb VALUES('2021-11-1' ,105, 'P001', 125);
INSERT INTO answer_tb VALUES('2021-11-1' , 102, 'P002', 105);
INSERT INTO answer_tb VALUES('2021-11-2', 101, 'P001' ,201);
INSERT INTO answer_tb VALUES('2021-11-2', 110, 'C002', 200);
INSERT INTO answer_tb VALUES('2021-11-2', 110, 'C001', 225);
INSERT INTO answer_tb VALUES('2021-11-2' , 110, 'C002', 220);
INSERT INTO answer_tb VALUES('2021-11-3', 101, 'C002', 180);
INSERT INTO answer_tb VALUES('2021-11-4' ,109, 'E003', 130);
INSERT INTO answer_tb VALUES('2021-11-4', 109, 'E001',123);
INSERT INTO answer_tb VALUES('2021-11-5', 108, 'C001',160);
INSERT INTO answer_tb VALUES('2021-11-5', 108, 'C002', 120);
INSERT INTO answer_tb VALUES('2021-11-5', 110, 'P001', 180);
INSERT INTO answer_tb VALUES('2021-11-5' , 106, 'P002' , 45);
INSERT INTO answer_tb VALUES('2021-11-5' , 107, 'E003', 56);
select
answer_date
,round(count(issue_id)/count(distinct author_id),2) per_num # 日人均回答量=回答问题数量/答题人数
from answer_tb
where month(answer_date)='11' # 11月份
group by answer_date
order by answer_date
;
2.某乎问答高质量的回答中用户属于各级别的数量
题目:回答字数大于等于100字的认为是高质量回答,请你统计某乎问答高质量的回答中用户属于1-2级、3-4级、5-6级的数量分别是多少,按数量降序排列。
--输入:
drop table if exists author_tb;
CREATE TABLE author_tb(
author_id int(10) NOT NULL,
author_level int(10) NOT NULL,
sex char(10) NOT NULL);
INSERT INTO author_tb VALUES(101 , 6, 'm');
INSERT INTO author_tb VALUES(102 , 1, 'f');
INSERT INTO author_tb VALUES(103 , 1, 'm');
INSERT INTO author_tb VALUES(104 , 3, 'm');
INSERT INTO author_tb VALUES(105 , 4, 'f');
INSERT INTO author_tb VALUES(106 , 2, 'f');
INSERT INTO author_tb VALUES(107 , 2, 'm');
INSERT INTO author_tb VALUES(108 , 5, 'f');
INSERT INTO author_tb VALUES(109 , 6, 'f');
INSERT INTO author_tb VALUES(110 , 5, 'm');
drop table if exists answer_tb;
CREATE TABLE answer_tb(
answer_date date NOT NULL,
author_id int(10) NOT NULL,
issue_id char(10) NOT NULL,
char_len int(10) NOT NULL);
INSERT INTO answer_tb VALUES('2021-11-1', 101, 'E001' ,150);
INSERT INTO answer_tb VALUES('2021-11-1', 101, 'E002', 200);
INSERT INTO answer_tb VALUES('2021-11-1',102, 'C003' ,50);
INSERT INTO answer_tb VALUES('2021-11-1' ,103, 'P001', 35);
INSERT INTO answer_tb VALUES('2021-11-1', 104, 'C003', 120);
INSERT INTO answer_tb VALUES('2021-11-1' ,105, 'P001', 125);
INSERT INTO answer_tb VALUES('2021-11-1' , 102, 'P002', 105);
INSERT INTO answer_tb VALUES('2021-11-2', 101, 'P001' ,201);
INSERT INTO answer_tb VALUES('2021-11-2', 110, 'C002', 200);
INSERT INTO answer_tb VALUES('2021-11-2', 110, 'C001', 225);
INSERT INTO answer_tb VALUES('2021-11-2' , 110, 'C002', 220);
INSERT INTO answer_tb VALUES('2021-11-3', 101, 'C002', 180);
INSERT INTO answer_tb VALUES('2021-11-4' ,109, 'E003', 130);
INSERT INTO answer_tb VALUES('2021-11-4', 109, 'E001',123);
INSERT INTO answer_tb VALUES('2021-11-5', 108, 'C001',160);
INSERT INTO answer_tb VALUES('2021-11-5', 108, 'C002', 120);
INSERT INTO answer_tb VALUES('2021-11-5', 110, 'P001', 180);
INSERT INTO answer_tb VALUES('2021-11-5' , 106, 'P002' , 45);
INSERT INTO answer_tb VALUES('2021-11-5' , 107, 'E003', 56);
select
level_cut
,count(a.author_id) num
from (
select
author_id
,author_level
,case when author_level<=2 then '1-2级'
when author_level<=4 then '3-4级'
when author_level<=6 then '5-6级'
else null end level_cut # 高质量的回答级别
from author_tb
) a join (
select
author_id
from answer_tb
where char_len>=100 # 回答字数大于等于100字
) b on a.author_id=b.author_id
group by level_cut
order by num desc
;
3.某乎问答单日回答问题数大于等于3个的所有用户
题目:请你统计11月份单日回答问题数大于等于3个的所有用户信息(author_date表示回答日期、author_id表示创作者id,answer_cnt表示回答问题个数)。
--输入:
drop table if exists answer_tb;
CREATE TABLE answer_tb(
answer_date date NOT NULL,
author_id int(10) NOT NULL,
issue_id char(10) NOT NULL,
char_len int(10) NOT NULL);
INSERT INTO answer_tb VALUES('2021-11-1', 101, 'E001' ,150);
INSERT INTO answer_tb VALUES('2021-11-1', 101, 'E002', 200);
INSERT INTO answer_tb VALUES('2021-11-1',102, 'C003' ,50);
INSERT INTO answer_tb VALUES('2021-11-1' ,103, 'P001', 35);
INSERT INTO answer_tb VALUES('2021-11-1', 104, 'C003', 120);
INSERT INTO answer_tb VALUES('2021-11-1' ,105, 'P001', 125);
INSERT INTO answer_tb VALUES('2021-11-1' , 102, 'P002', 105);
INSERT INTO answer_tb VALUES('2021-11-2', 101, 'P001' ,201);
INSERT INTO answer_tb VALUES('2021-11-2', 110, 'C002', 200);
INSERT INTO answer_tb VALUES('2021-11-2', 110, 'C001', 225);
INSERT INTO answer_tb VALUES('2021-11-2' , 110, 'C002', 220);
INSERT INTO answer_tb VALUES('2021-11-3', 101, 'C002', 180);
INSERT INTO answer_tb VALUES('2021-11-4' ,109, 'E003', 130);
INSERT INTO answer_tb VALUES('2021-11-4', 109, 'E001',123);
INSERT INTO answer_tb VALUES('2021-11-5', 108, 'C001',160);
INSERT INTO answer_tb VALUES('2021-11-5', 108, 'C002', 120);
INSERT INTO answer_tb VALUES('2021-11-5', 110, 'P001', 180);
INSERT INTO answer_tb VALUES('2021-11-5' , 106, 'P002' , 45);
INSERT INTO answer_tb VALUES('2021-11-5' , 107, 'E003', 56);
select
answer_date
,author_id
,count(issue_id) answer_cnt
from answer_tb
where month(answer_date)='11' # 11月份
group by answer_date,author_id
having count(issue_id) >=3 # 单日回答问题数大于等于3个
order by answer_date,author_id
;
4.某乎问答回答过教育类问题的用户里有多少用户回答
题目:请你统计回答过教育类问题的用户里有多少用户回答过职场类问题。
--输入:
drop table if exists issue_tb;
CREATE TABLE issue_tb(
issue_id char(10) NOT NULL,
issue_type char(10) NOT NULL);
INSERT INTO issue_tb VALUES('E001' ,'Education');
INSERT INTO issue_tb VALUES('E002' ,'Education');
INSERT INTO issue_tb VALUES('E003' ,'Education');
INSERT INTO issue_tb VALUES('C001', 'Career');
INSERT INTO issue_tb VALUES('C002', 'Career');
INSERT INTO issue_tb VALUES('C003', 'Career');
INSERT INTO issue_tb VALUES('C004', 'Career');
INSERT INTO issue_tb VALUES('P001' ,'Psychology');
INSERT INTO issue_tb VALUES('P002' ,'Psychology');
drop table if exists answer_tb;
CREATE TABLE answer_tb(
answer_date date NOT NULL,
author_id int(10) NOT NULL,
issue_id char(10) NOT NULL,
char_len int(10) NOT NULL);
INSERT INTO answer_tb VALUES('2021-11-1', 101, 'E001' ,150);
INSERT INTO answer_tb VALUES('2021-11-1', 101, 'E002', 200);
INSERT INTO answer_tb VALUES('2021-11-1',102, 'C003' ,50);
INSERT INTO answer_tb VALUES('2021-11-1' ,103, 'P001', 35);
INSERT INTO answer_tb VALUES('2021-11-1', 104, 'C003', 120);
INSERT INTO answer_tb VALUES('2021-11-1' ,105, 'P001', 125);
INSERT INTO answer_tb VALUES('2021-11-1' , 102, 'P002', 105);
INSERT INTO answer_tb VALUES('2021-11-2', 101, 'P001' ,201);
INSERT INTO answer_tb VALUES('2021-11-2', 110, 'C002', 200);
INSERT INTO answer_tb VALUES('2021-11-2', 110, 'C001', 225);
INSERT INTO answer_tb VALUES('2021-11-2' , 110, 'C002', 220);
INSERT INTO answer_tb VALUES('2021-11-3', 101, 'C002', 180);
INSERT INTO answer_tb VALUES('2021-11-4' ,109, 'E003', 130);
INSERT INTO answer_tb VALUES('2021-11-4', 109, 'E001',123);
INSERT INTO answer_tb VALUES('2021-11-5', 108, 'C001',160);
INSERT INTO answer_tb VALUES('2021-11-5', 108, 'C002', 120);
INSERT INTO answer_tb VALUES('2021-11-5', 110, 'P001', 180);
INSERT INTO answer_tb VALUES('2021-11-5' , 106, 'P002' , 45);
INSERT INTO answer_tb VALUES('2021-11-5' , 107, 'E003', 56);
-- 方法1
select
count(distinct author_id) num # 既回答过教育类问题又回答过职场类问题的用户
from issue_tb a
join answer_tb b
on a.issue_id=b.issue_id
where a.issue_type='Education' # 回答过教育类问题的用户
and author_id in (
select distinct
author_id
from issue_tb a
join answer_tb b
on a.issue_id=b.issue_id
where a.issue_type='Career' # 回答过职场类问题的用户
)
;
-- 方法2
select
count(author_id) num # 既回答过教育类问题又回答过职场类问题的用户数
from (
select
author_id
from (
select
author_id
,issue_id
from answer_tb
where issue_id in (
select
issue_id
from issue_tb
where issue_type in ('Education','Career') # 教育类问题和职场类问题
)
) a
group by author_id
having count(distinct substr(issue_id,1,1))=2 # 既回答过教育类问题又回答过职场类问题
) a1
;
5.某乎问答最大连续回答问题天数大于等于3天的用户
题目:请你统计最大连续回答问题的天数大于等于3天的用户及其等级(若有多条符合条件的数据,按author_id升序排序)。
--输入:
drop table if exists author_tb;
CREATE TABLE author_tb(
author_id int(10) NOT NULL,
author_level int(10) NOT NULL,
sex char(10) NOT NULL);
INSERT INTO author_tb VALUES(101 , 6, 'm');
INSERT INTO author_tb VALUES(102 , 1, 'f');
INSERT INTO author_tb VALUES(103 , 1, 'm');
INSERT INTO author_tb VALUES(104 , 3, 'm');
INSERT INTO author_tb VALUES(105 , 4, 'f');
INSERT INTO author_tb VALUES(106 , 2, 'f');
INSERT INTO author_tb VALUES(107 , 2, 'm');
INSERT INTO author_tb VALUES(108 , 5, 'f');
INSERT INTO author_tb VALUES(109 , 6, 'f');
INSERT INTO author_tb VALUES(110 , 5, 'm');
drop table if exists answer_tb;
CREATE TABLE answer_tb(
answer_date date NOT NULL,
author_id int(10) NOT NULL,
issue_id char(10) NOT NULL,
char_len int(10) NOT NULL);
INSERT INTO answer_tb VALUES('2021-11-1', 101, 'E001' ,150);
INSERT INTO answer_tb VALUES('2021-11-1', 101, 'E002', 200);
INSERT INTO answer_tb VALUES('2021-11-1',102, 'C003' ,50);
INSERT INTO answer_tb VALUES('2021-11-1' ,103, 'P001', 35);
INSERT INTO answer_tb VALUES('2021-11-1', 104, 'C003', 120);
INSERT INTO answer_tb VALUES('2021-11-1' ,105, 'P001', 125);
INSERT INTO answer_tb VALUES('2021-11-1' , 102, 'P002', 105);
INSERT INTO answer_tb VALUES('2021-11-2', 101, 'P001' ,201);
INSERT INTO answer_tb VALUES('2021-11-2', 110, 'C002', 200);
INSERT INTO answer_tb VALUES('2021-11-2', 110, 'C001', 225);
INSERT INTO answer_tb VALUES('2021-11-2' , 110, 'C002', 220);
INSERT INTO answer_tb VALUES('2021-11-3', 101, 'C002', 180);
INSERT INTO answer_tb VALUES('2021-11-4' ,109, 'E003', 130);
INSERT INTO answer_tb VALUES('2021-11-4', 109, 'E001',123);
INSERT INTO answer_tb VALUES('2021-11-5', 108, 'C001',160);
INSERT INTO answer_tb VALUES('2021-11-5', 108, 'C002', 120);
INSERT INTO answer_tb VALUES('2021-11-5', 110, 'P001', 180);
INSERT INTO answer_tb VALUES('2021-11-5' , 106, 'P002' , 45);
INSERT INTO answer_tb VALUES('2021-11-5' , 107, 'E003', 56);
with t1 as (
select
author_id
,max(rnn) days_cnt # 最大连续回答问题的天数
from (
select
author_id
,answer_date
,rn
,diff_date
,dense_rank() over(partition by author_id,diff_date order by answer_date) rnn
from (
select
author_id
,answer_date
,rn
,date_sub(answer_date,interval rn day) diff_date
from (
select distinct
author_id
,answer_date
,dense_rank() over(partition by author_id order by answer_date) rn
from answer_tb
) a
) a1
) a2
group by author_id
having max(rnn)>=3 # 最大连续回答问题的天数大于等于3天
)
select
a.author_id
,b.author_level
,a.days_cnt
from (
select
author_id
,days_cnt
from t1
) a join (
select
author_id
,author_level
from author_tb
) b on a.author_id=b.author_id
order by author_id
;