文章目录
SQL61 给出employees表中排名为奇数行的first_name
示例1
输入:
drop table if exists `employees` ;
CREATE TABLE `employees` (
`emp_no` int(11) NOT NULL,
`birth_date` date NOT NULL,
`first_name` varchar(14) NOT NULL,
`last_name` varchar(16) NOT NULL,
`gender` char(1) NOT NULL,
`hire_date` date NOT NULL,
PRIMARY KEY (`emp_no`));
INSERT INTO employees VALUES(10001,'1953-09-02','Georgi','Facello','M','1986-06-26');
INSERT INTO employees VALUES(10002,'1964-06-02','Bezalel','Simmel','F','1985-11-21');
INSERT INTO employees VALUES(10005,'1955-01-21','Kyoichi','Maliniak','M','1989-09-12');
INSERT INTO employees VALUES(10006,'1953-04-20','Anneke','Preusig','F','1989-06-02');
输出:
Georgi
Anneke
知识点:窗口函数row_number
题解:先用窗口函数row_number
筛选出按first_name
排序后,排在奇数位置的记录,取出这些记录的emp_no
,而后便可从原表中根据这些emp_no
按照原顺序拿到我们想要的结果了
select
first_name first
from
employees
where
emp_no in ( select emp_no from
( select emp_no, row_number() over(order by first_name) as r from employees) t where r % 2 = 1);
SQL62 出现三次以上相同积分的情况
示例1
输入:
drop table if exists grade;
CREATE TABLE `grade` (
`id` int(4) NOT NULL,
`number` int(4) NOT NULL,
PRIMARY KEY (`id`));
INSERT INTO grade VALUES
(1,111),
(2,333),
(3,111),
(4,111),
(5,333);
输出:
111
知识点:group by 和having
select
number
from
grade
group by
number
having
count(number) >= 3
order by
number;
SQL63 刷题通过的题目排名
示例1
输入:
drop table if exists passing_number;
CREATE TABLE `passing_number` (
`id` int(4) NOT NULL,
`number` int(4) NOT NULL,
PRIMARY KEY (`id`));
INSERT INTO passing_number VALUES
(1,4),
(2,3),
(3,3),
(4,2),
(6,4),
(5,5);
输出:
5|5|1
1|4|2
6|4|2
2|3|3
3|3|3
4|2|4
知识点:窗口函数dense_rank
select *,dense_rank() over(order by number desc) t_rank
from passing_number;
SQL64 找到每个人的任务
示例1
输入:
drop table if exists person;
drop table if exists task;
CREATE TABLE `person` (
`id` int(4) NOT NULL,
`name` varchar(32) NOT NULL,
PRIMARY KEY (`id`));
CREATE TABLE `task` (
`id` int(4) NOT NULL,
`person_id` int(4) NOT NULL,
`content` varchar(32) NOT NULL,
PRIMARY KEY (`id`));
INSERT INTO person VALUES
(1,'fh'),
(2,'tm');
INSERT INTO task VALUES
(1,2,'tm works well'),
(2,2,'tm works well');
输出:
1|fh|None
2|tm|tm works well
2|tm|tm works well
知识点:左连接left join
select
p.*,
t.content
from
person p
left join task t on p.id = t.person_id
order by
p.id;
SQL65 异常的邮件概率
示例1
输入:
drop table if exists email;
drop table if exists user;
CREATE TABLE `email` (
`id` int(4) NOT NULL,
`send_id` int(4) NOT NULL,
`receive_id` int(4) NOT NULL,
`type` varchar(32) NOT NULL,
`date` date NOT NULL,
PRIMARY KEY (`id`));
CREATE TABLE `user` (
`id` int(4) NOT NULL,
`is_blacklist` int(4) NOT NULL,
PRIMARY KEY (`id`));
INSERT INTO email VALUES
(1,2,3,'completed','2020-01-11'),
(2,1,3,'completed','2020-01-11'),
(3,1,4,'no_completed','2020-01-11'),
(4,3,1,'completed','2020-01-12'),
(5,3,4,'completed','2020-01-12'),
(6,4,1,'completed','2020-01-12');
INSERT INTO user VALUES
(1,0),
(2,1),
(3,0),
(4,0);
输出:
2020-01-11|0.500
2020-01-12|0.000
知识点:round函数
- 两次关联
user
表,从而让发送者身份和接收者身份能出现在同一条记录中,然后筛选出发送者和接收者都是正常用户的记录 - 按照date进行分组
sum(e.type = "no_completed")/count(1)
不能写成count(e.type = “no_completed”)/count(1),因为sum
会对e.type = "no_completed"
的结果(0或1)
进行求和,但是count
则是拿到e.type
的总行数,不符合预期
select e.date,round(sum(e.type = "no_completed")/count(1),3) as p
from email e
join user u1 on e.send_id = u1.id
join user u2 on e.receive_id = u2.id
where u1.is_blacklist = 0 and u2.is_blacklist = 0
group by e.date
order by e.date;
SQL66 牛客每个人最近的登录日期(一)
示例1
输入:
drop table if exists login;
drop table if exists user;
drop table if exists client;
CREATE TABLE login (
id int(4) NOT NULL,
user_id int(4) NOT NULL,
client_id int(4) NOT NULL,
date date NOT NULL,
PRIMARY KEY (id));
CREATE TABLE user (
id int(4) NOT NULL,
name varchar(32) NOT NULL,
PRIMARY KEY (id));
CREATE TABLE client (
id int(4) NOT NULL,
name varchar(32) NOT NULL,
PRIMARY KEY (id));
INSERT INTO login VALUES
(1,2,1,'2020-10-12'),
(2,3,2,'2020-10-12'),
(3,2,2,'2020-10-13'),
(4,3,2,'2020-10-13');
INSERT INTO user VALUES
(1,'tm'),
(2,'fh'),
(3,'wangchao');
INSERT INTO client VALUES
(1,'pc'),
(2,'ios'),
(3,'anroid'),
(4,'h5');
输出:
2|2020-10-13
3|2020-10-13
知识点:聚合函数max
select
user_id,
max(date)
from
login
group by
user_id
order by
user_id;
SQL67 牛客每个人最近的登录日期(二)
示例1
输入:
drop table if exists login;
drop table if exists user;
drop table if exists client;
CREATE TABLE `login` (
`id` int(4) NOT NULL,
`user_id` int(4) NOT NULL,
`client_id` int(4) NOT NULL,
`date` date NOT NULL,
PRIMARY KEY (`id`));
CREATE TABLE `user` (
`id` int(4) NOT NULL,
`name` varchar(32) NOT NULL,
PRIMARY KEY (`id`));
CREATE TABLE `client` (
`id` int(4) NOT NULL,
`name` varchar(32) NOT NULL,
PRIMARY KEY (`id`));
INSERT INTO login VALUES
(1,2,1,'2020-10-12'),
(2,3,2,'2020-10-12'),
(3,2,2,'2020-10-13'),
(4,3,2,'2020-10-13');
INSERT INTO user VALUES
(1,'tm'),
(2,'fh'),
(3,'wangchao');
INSERT INTO client VALUES
(1,'pc'),
(2,'ios'),
(3,'anroid'),
(4,'h5');
输出:
fh|ios|2020-10-13
wangchao|ios|2020-10-13
知识点:关联查询,子查询,窗口函数rank
- 先通过关联查询将我们需要的字段放到同一个表中(解答中最内层子查询)
- 通过
rank
窗口函数对user_name
进行分组排序(解答中第二层子查询) - 筛选出每组中日期最大的,然后对最终的结果按照
user_name
排序
select
u_n,
c_n,
date
from
(
select
u_n,
c_n,
date,
rank() over(
partition by u_n
order by
date desc
) d_rank
from
(
select
u.name u_n,
c.name c_n,
l.date
from
login l
join user u on l.user_id = u.id
join client c on l.client_id = c.id
) t
) t2
where
d_rank = 1
order by u_n;
SQL68 牛客每个人最近的登录日期(三)
示例1
输入:
drop table if exists login;
CREATE TABLE `login` (
`id` int(4) NOT NULL,
`user_id` int(4) NOT NULL,
`client_id` int(4) NOT NULL,
`date` date NOT NULL,
PRIMARY KEY (`id`));
INSERT INTO login VALUES
(1,2,1,'2020-10-12'),
(2,3,2,'2020-10-12'),
(3,1,2,'2020-10-12'),
(4,2,2,'2020-10-13'),
(5,4,1,'2020-10-13'),
(6,1,2,'2020-10-13'),
(7,1,2,'2020-10-14');
输出:
0.500
知识点:新用户次日成功的留存率
count不统计为null的记录
- 查询用户的
user_id
和首次登陆日期,确认是新登录用户。
select user_id,min(date) first_date from login group by user_id
- 次日成功的留存率,即第
1
天登陆之后,第2
天再次登陆的概率
在1
中的表后连接第2
天的登陆时间。如果第2
天登陆了,l2.date
即为第二天的日期,如果第2
天没有登陆,l2.date
即为null
。根据l2.date
是否为null
,来判断第2
天是否登陆。
则第2
天再次登陆的次数为count(l2.date)
,第1
天登陆的新用户数为count(*)
,次日成功留存率为count(l2.date)/count(*)
- 保存小数点后面
3
位(3
位之后的四舍五入),使用round()
select round(count(l2.date)/count(*),3)
from (select user_id,min(date) first_date from login group by user_id) l1
left join login l2 on l1.user_id = l2.user_id and l2.date = date_add(l1.first_date,interval 1 day);
SQL69 牛客每个人最近的登录日期(四)
示例1
输入:
drop table if exists login;
CREATE TABLE `login` (
`id` int(4) NOT NULL,
`user_id` int(4) NOT NULL,
`client_id` int(4) NOT NULL,
`date` date NOT NULL,
PRIMARY KEY (`id`));
INSERT INTO login VALUES
(1,2,1,'2020-10-12'),
(2,3,2,'2020-10-12'),
(3,1,2,'2020-10-12'),
(4,2,2,'2020-10-13'),
(5,1,2,'2020-10-13'),
(6,3,1,'2020-10-14'),
(7,4,1,'2020-10-14'),
(8,4,1,'2020-10-15');
输出:
2020-10-12|3
2020-10-13|0
2020-10-14|1
2020-10-15|0
知识点:每日新增用户个数
- 先得到所有日期表
select distinct date from login
; - 然后左连接新用户首次登陆的日期表
(select user_id, min(date) first_date from login group by user_id)
; - 归类统计日期出现的次数.
select a.date,count(b.user_id)
from (select distinct date from login) a
left join (select user_id ,min(date) first_date from login group by user_id) b
on a.date = b.first_date
group by a.date
order by a.date;
SQL70 牛客每个人最近的登录日期(五)
示例1
输入:
drop table if exists login;
CREATE TABLE `login` (
`id` int(4) NOT NULL,
`user_id` int(4) NOT NULL,
`client_id` int(4) NOT NULL,
`date` date NOT NULL,
PRIMARY KEY (`id`));
INSERT INTO login VALUES
(1,2,1,'2020-10-12'),
(2,3,2,'2020-10-12'),
(3,1,2,'2020-10-12'),
(4,2,2,'2020-10-13'),
(5,1,2,'2020-10-13'),
(6,3,1,'2020-10-14'),
(7,4,1,'2020-10-14'),
(8,4,1,'2020-10-15');
输出:
2020-10-12|0.667
2020-10-13|0.000
2020-10-14|1.000
2020-10-15|0.000
求出新用户第一天登录日期
SELECT user_id,MIN(date) AS date FROM login GROUP BY user_id
算出有新用户登录的日期的新用户留存率
SELECT a.date,ROUND(COUNT(DISTINCT login.user_id)/ COUNT(a.user_id),3) AS p
FROM (SELECT user_id,MIN(date) AS date FROM login GROUP BY user_id) AS a
LEFT JOIN login
ON login.user_id=a.user_id
AND login.date=DATE_ADD(a.date,INTERVAL 1 DAY)
GROUP BY a.date
用UNION补上没有新用户登录的日期
SELECT a.date,ROUND(COUNT(DISTINCT login.user_id)/ COUNT(a.user_id),3) AS p
FROM (SELECT user_id,MIN(date) AS date FROM login GROUP BY user_id) AS a
LEFT JOIN login
ON login.user_id=a.user_id
AND login.date=DATE_ADD(a.date,INTERVAL 1 DAY)
GROUP BY a.date
UNION
SELECT date,0.000 AS p
FROM login
WHERE date NOT IN(
SELECT MIN(date) FROM login GROUP BY user_id)
ORDER BY date;
SQL71 牛客每个人最近的登录日期(六)
示例1
输入:
drop table if exists login;
drop table if exists passing_number;
drop table if exists user;
drop table if exists client;
CREATE TABLE `login` (
`id` int(4) NOT NULL,
`user_id` int(4) NOT NULL,
`client_id` int(4) NOT NULL,
`date` date NOT NULL,
PRIMARY KEY (`id`));
CREATE TABLE `passing_number` (
`id` int(4) NOT NULL,
`user_id` int(4) NOT NULL,
`number` int(4) NOT NULL,
`date` date NOT NULL,
PRIMARY KEY (`id`));
CREATE TABLE `user` (
`id` int(4) NOT NULL,
`name` varchar(32) NOT NULL,
PRIMARY KEY (`id`));
INSERT INTO login VALUES
(1,2,1,'2020-10-12'),
(2,3,2,'2020-10-12'),
(3,1,2,'2020-10-12'),
(4,1,3,'2020-10-13'),
(5,3,2,'2020-10-13');
INSERT INTO passing_number VALUES
(1,2,4,'2020-10-12'),
(2,3,1,'2020-10-12'),
(3,1,0,'2020-10-13'),
(4,3,2,'2020-10-13');
INSERT INTO user VALUES
(1,'tm'),
(2,'fh'),
(3,'wangchao');
输出:
fh|2020-10-12|4
wangchao|2020-10-12|1
tm|2020-10-13|0
wangchao|2020-10-13|3
明确问题:
- 登录不一定刷题,刷题不一定通过。刷题了就会存在刷题表里。
- 三张表:登录表、刷题表、用户表。
- 因为有登录没有刷题的数据不需要输出,所以截止到某天指的是刷题表有记录的日期。没有用到登录表。
- 用窗口函数按姓名分区按日期排序计算累计数。
select u.name as u_n
,p.date as date
,sum(p.number)over(partition by u.name order by date)as ps_num
from passing_number p,user u
where p.user_id=u.id
order by p.date,u.name;