刷题链接地址:https://www.nowcoder.com/exam/oj?page=1&tab=SQL篇&topicId=199
学习文章:通俗易懂的学会 SQL 窗口函数
1.基础查询
1.1.基础查询
1.1.1.查询所有列
select * from user_profile;
1.1.2.查询多列
select device_id, gender, age, university from user_profile;
1.2.简单处理查询结果
1.2.1.查询结果去重
select distinct university from user_profile;
1.2.2.查询结果限制返回行数
select device_id from user_profile limit 2;
1.2.3.将查询后的列重新命名
select device_id as user_infos_example from user_profile limit 2;
2.条件查询
2.1.基础排序
2.1.1.查找后排序
select device_id, age from user_profile order by age asc;
2.1.2.查找后多列排序
select device_id, gpa, age from user_profile order by gpa asc, age asc;
2.1.3.查找后降序排列
select device_id, gpa, age from user_profile order by gpa desc, age desc;
2.2.基础操作符
2.2.1.查找学校是 XXX 的学生信息
select device_id, university from user_profile where university = "XXX";
2.2.2.查找年龄大于 24 岁的用户信息
select device_id, gender, age, university from user_profile where age >24;
2.2.3.查找某个年龄段的用户信息
select device_id, gender, age from user_profile where age between 20 and 23;
2.2.4.查找除XX学校 的用户信息
select device_id, gender, age, university from user_profile where university <> "XX学校";
2.2.5.用 where 过滤空值练习
select device_id, gender, age, university from user_profile where age is not NULL;
2.3.高级操作符
2.3.1.高级操作符(练习一)
select device_id, gender, age, university, gpa from user_profile
where gpa>3.5 and gender="male";
2.3.2.高级操作符(练习二)
要求:使用 OR 实现
select device_id, gender, age, university, gpa from user_profile
where university="北京大学" or gpa > 3.7;
2.3.3.Where in 和 Not in
- IN
select device_id, gender, age, university, gpa from user_profile
where university in("北京大学", "复旦大学", "山东大学");
- NOT IN
select device_id, gender, age, university, gpa from user_profile
where university not in("浙江大学","清华大学","南京大学","中山大学");
2.3.4.操作符的混合运用
select device_id, gender, age, university, gpa from user_profile
where (gpa>3.5 and university="山东大学") or (gpa>3.8 and university="复旦大学");
- 也可以使用子查询
select device_id, gender, age, university, gpa from user_profile
where
device_id in (select device_id from user_profile where gpa>3.5 and university='山东大学')
or
device_id in (select device_id from user_profile where gpa>3.8 and university='复旦大学');
2.3.5.查看学校名称中含北京的用户
select device_id, age, university from user_profile where university like "%北京%";
3.高级查询
3.1.计算函数
3.1.1.查找 GPA 最高值
select max(gpa) as gpa from user_profile where university="复旦大学";
- 也可以使用 limit
select gpa from user_profile where university="复旦大学" order by gpa desc limit 1;
3.1.2.计算男生人数以及平均 GPA
select count(gender) as male_num, round(avg(gpa), 1) as avg_gpa from user_profile where gender="male";
3.2.分组查询
3.2.1.分组计算练习题
select
gender, university,
count(device_id) as user_num,
avg(active_days_within_30) as avg_active_days,
avg(question_cnt) as avg_question_cnt
from
user_profile
group by
gender, university;
3.2.2.分组过滤练习题
select
university,
avg(question_cnt) as avg_question_cnt,
avg(answer_cnt) as avg_answer_cnt
from
user_profile
group by
university
having
avg(question_cnt) < 5
or avg(answer_cnt) < 20;
3.2.3.分组排序练习题
select
university,
avg(question_cnt) as avg_question_cnt
from
user_profile
group by
university
order by
avg_question_cnt;
4.多表查询
4.1.子查询
查看 XX大学 用户题目回答情况
- 显式内连接
select
q.device_id, q.question_id, q.result
from
question_practice_detail as q
inner join user_profile as u
on q.device_id = u.device_id and university = "浙江大学"
order by
q.question_id;
- 表子查询
select
device_id, question_id, result
from
question_practice_detail
where
device_id in (
select device_id from user_profile where university = "浙江大学"
)
order by
question_id;
4.2.链接查询
4.2.1.统计每个学校的答过题的用户的平均答题数
select
u.university,
truncate((count(q.question_id) / count(distinct q.device_id)), 4) as avg_answer_cnt
from
user_profile as u
inner join question_practice_detail as q
on u.device_id = q.device_id
group by
university
order by
university asc;
4.2.2.统计每个学校各难度的用户平均刷题数
select
u.university,
qd.difficult_level,
round(
(count(qpd.question_id) / count(distinct qpd.device_id)), 4
) as avg_answer_cnt
from
question_practice_detail as qpd
left join user_profile as u on u.device_id = qpd.device_id
inner join question_detail as qd on qpd.question_id = qd.question_id
group by
u.university, qd.difficult_level;
4.2.3.统计每个用户的平均刷题数
select
u.university,
qd.difficult_level,
round(
(count(qpd.question_id) / count(distinct qpd.device_id)), 4
) as avg_answer_cnt
from
question_practice_detail as qpd
left join user_profile as u on u.device_id = qpd.device_id
inner join question_detail as qd on qpd.question_id = qd.question_id
where university="山东大学"
group by qd.difficult_level;
4.3.组合查询
查找XX大学或者性别为男生的信息
select device_id, gender, age, gpa from user_profile where university="山东大学"
union all
select device_id, gender, age, gpa from user_profile where gender='male';
- 核心在于不去重:union all
5.必会的常用函数
5.1.条件函数
5.1.1.计算 25 岁以上和以下的用户数量
SELECT
CASE
WHEN age < 25 OR age IS NULL THEN "25岁以下"
WHEN age >= 25 THEN "25岁及以上"
END as age_cut,
COUNT(*) as number
FROM user_profile
GROUP BY age_cut;
5.1.2.查看不同年龄段的用户明细
select
device_id, gender,
case
when age >= 25 then '25岁及以上'
when age >= 20 then '20-24岁'
when age < 20 then '20岁以下'
else '其他'
end as age_cut
from
user_profile;
5.2.日期函数
5.2.1.计算用户 2021 年 8 月每天的练题数量
select
day(date) as day,
count(question_id) as question_cnt
from question_practice_detail
where month(date)=8 and year(date)=2021
group by date;;
5.2.2.计算用户的平均次日留存率
查看用户在某天刷题后第二天还会再来刷题的平均概率
SELECT
COUNT(qpd2.device_id) / COUNT(qpd1.device_id) AS avg_ret
FROM
(SELECT DISTINCT device_id, date FROM question_practice_detail) AS qpd1
LEFT JOIN
(SELECT DISTINCT device_id, date FROM question_practice_detail) AS qpd2
ON qpd1.device_id = qpd2.device_id
AND qpd2.date = DATE_ADD(qpd1.date, INTERVAL 1 DAY);
5.3.文本函数
5.3.1.统计每种性别的人数
- substring_index
SELECT
SUBSTRING_INDEX(profile, "," , -1) AS gender,
COUNT(device_id) AS number
FROM user_submit
GROUP BY gender;
- if + like
SELECT
IF (profile LIKE '%female', 'female', 'male') gender,
COUNT(*) number
FROM user_submit
GROUP BY gender;
5.3.2.提取博客 URL 中的用户名
- 替换
select
device_id,
replace(blog_url, "http:/url/", "") as user_name
from user_submit;
- 截取
select
device_id,
substr(blog_url, 11, length(blog_url)-10) as user_name
from user_submit;
- 删除
select
device_id,
trim('http:/url/' from blog_url) as user_name
from user_submit;
- 字段切割
select
device_id,
substring_index(blog_url, "/", -1) as user_name
from user_submit;
5.3.3.截取出年龄
select
substring_index(substring_index(profile, ',', 3), ',', -1) as age,
count(device_id) as number
from user_submit
group by age
5.4.窗口函数
找出每个学校 GPA 最低的同学
- 窗口函数
select
device_id, university, gpa
from
(
select
*,
row_number() over (partition by university order by gpa) as rn
from user_profile
) as univ_min
where rn = 1
order by university;
参考文章:通俗易懂的学会:SQL 窗口函数
窗口函数的基本语法
<窗口函数> over (partition by <用于分组的列名> order by <用于排序的列名>)
<窗口函数>
的位置,可以放以下两种函数:
- 专用窗口函数。如 rank,dense_rank,row_number 等专用窗口函数。
- 聚合函数。如 sum,avg,count,max,min 等
- 因为窗口函数是对 where 或者 group by 子句处理后的结果进行操作,所以窗口函数原则上只能写在 select 子句中。
- 右连接 + group by
select a.device_id, a.university, a.gpa
from
user_profile as a
right join
(select university, min(gpa) as gpa from user_profile group by university) as b
on a.university=b.university and a.gpa=b.gpa
order by university;
6.综合练习
6.1.统计XX学校用户 8 月练题情况
SELECT
up.device_id,
"复旦大学" AS university,
count(question_id) AS question_cnt,
SUM(if(qpd.result='right', 1, 0)) AS right_question_cnt
FROM
user_profile AS up
LEFT JOIN
question_practice_detail AS qpd
ON
qpd.device_id = up.device_id AND month(qpd.date) = 8
WHERE
up.university = '复旦大学'
GROUP BY up.device_id;
6.2.XX学校的不同难度题目的正确率
SELECT
qd.difficult_level,
(SUM(if(qpd.result="right", 1, 0)) / count(qpd.question_id)) AS correct_rate
FROM
user_profile AS up
INNER JOIN
question_practice_detail AS qpd
ON
up.device_id = qpd.device_id
INNER JOIN
question_detail AS qd
ON
qpd.question_id = qd.question_id
WHERE up.university = '浙江大学'
GROUP BY qd.difficult_level
ORDER BY correct_rate ASC;
6.3.2021 年 8 月份练题总数
SELECT
COUNT(distinct device_id) AS did_cnt,
COUNT(question_id) AS question_cnt
FROM
question_practice_detail
WHERE
YEAR(date)=2021 AND MONTH(date)=8;