练习链接:
https://www.nowcoder.com/exam/oj?page=1&tab=SQL%E7%AF%87&topicId=240
牛客SQL 256套代码及解析:
https://blog.csdn.net/qq118640594X/article/details/131466462?spm=1001.2014.3001.5501
1.增删改操作
插入记录
SQL110 插入记录(一)
(1. 插入:insert into 表名 values (第一行记录),(第二行记录)… ;
2.计算增加后的时间: date_add(被增加的时间,interval 增加多少时间);)
INSERT into exam_record values
(DEFAULT,1001,9001,'2021-09-01 22:11:12',date_add('2021-09-01 22:11:12',interval 50 minute) ,90),
(DEFAULT,1002,9002,'2021-09-04 07:01:02',DEFAULT ,DEFAULT)
SQL111 插入记录(二)
(1.INSERT INTO table2(id, name, address) SELECT id, name, address FROM table1 ;
2.如果简写成INSERT INTO table2 SELECT id, name, address FROM table1形式,需要SELECT后面的列的顺序 必须和目标表中的列的定义顺序完全一致 才能完成正确的数据插入)
insert into exam_record_before_2021(uid,exam_id,start_time,submit_time,score)
select uid,exam_id,start_time,submit_time,score
from exam_record
where submit_time <'2021-01-01'
SQL112 插入记录(三)
(1.第二行字符串要加引号;
2.关键字NULL可以用DEFAULT替代;
3. replace into 表名 values (记录);
4. REPLACE INTO 跟 INSERT 功能类似。不同点在于:REPLACE INTO 首先尝试插入数据到表中, 如果发现表中已经有此行数据(根据主键或者唯一索引判断)则先删除此行数据,然后插入新的数据;否则,直接插入新数据。要注意的是:插入数据的表必须有主键或者是唯一索引!否则的话,REPLACE INTO 会直接插入数据,这将导致表中出现重复的数据。)
replace into examination_info values
(null,9003,'SQL','hard',90,'2021-01-01 00:00:00')
更新记录
SQL113 更新记录(一)
(1. UPDATE table_name SET column1 = value1, column2 = value2, … WHERE condition;
2.replace与update的区别:replace对一整条记录的值做修改 update可以只修改某个或者某些记录的值)
update examination_info
set tag='Python'
where tag='PYTHON'
SQL114 更新记录(二)
update exam_record
set submit_time='2099-01-01 00:00:00',score=0
where start_time<'2021-09-01' and submit_time is null
删除记录
SQL115 删除记录(一)
(1.时间差函数 timestampdiff(unit, begin,end),其中unit是(end-begin)的结果的单位,表示为整数。 以下是有效单位:MICROSECOND、SECOND、MINUTE、HOUR、DAY、WEEK、MONTH ;
2. 根据条件删除:DELETE FROM tb_name [where options ] [ [ ORDER BY fields ] LIMIT n ]
全部删除(表清空,包含自增计数器重置):TRUNCATE tb_name)
delete from exam_record
where timestampdiff(minute,start_time,submit_time)<5
and score<60
SQL116 删除记录(二)
delete from exam_record
where submit_time is null or
timestampdiff(minute,start_time,submit_time) <5
order by start_time
limit 3
SQL117 删除记录(三)
(DROP TABLE, TRUNCATE TABLE, DELETE TABLE 三种删除语句的区别:
1.DROP TABLE 清除数据并且销毁表,是一种数据库定义语言(DDL Data Definition Language), 执行后不能撤销,被删除表格的关系,索引,权限等等都会被永久删除。
2.TRUNCATE TABLE 只清除数据,保留表结构,列,权限,索引,视图,关系等等,相当于清零数据,是一种数据库定义语言(DDL Data Definition Language),执行后不能撤销。
3.DELETE TABLE 删除(符合某些条件的)数据,是一种数据操纵语言(DML Data Manipulation Language),执行后可以撤销。
运行速度一般DROP最快,DELETE最慢,但是DELETE最安全。)
truncate exam_record
2.表与索引操作
表的创建、修改与删除
SQL118 创建一张新表
(1. create table 表名(… , … , …)default charset=UTF8 )
create table user_info_vip(
id int(11) primary key auto_increment comment "自增ID",
uid int(11) unique not null comment "用户ID",
nick_name varchar(64) comment "昵称",
achievement int(11) default 0 comment "成就值",
level int(11) comment "用户等级",
job varchar(32) comment "职业方向",
register_time datetime default CURRENT_TIMESTAMP comment "注册时间"
) default CHARSET=UTF8;
SQL119 修改表
(1.alter table 表名 add/change/modify …;
2.add 增加列的名称 数据类型;
3.change 原列名 修改列名 修改数据类型;
4. modify 修改列名称 数据类型 默认值;)
ALTER TABLE user_info ADD school varchar(15) AFTER level;
ALTER TABLE user_info CHANGE job profession VARCHAR(10);
alter table user_info modify achievement int(11) DEFAULT 0 ;
SQL120 删除表
(1.删除表:DROP TABLE [IF EXISTS] 表名1 [, 表名2])
DROP TABLE IF EXISTS
exam_record_2011,
exam_record_2012,
exam_record_2013,
exam_record_2014;
索引的创建、删除
(* 1.索引使用时满足最左前缀匹配原则,即对于组合索引(col1, col2),在不考虑引擎优化时,条件必须是col1在前col2在后,或者只使用col1,索引才会生效;
2.索引不包含有NULL值的列
3.一个查询只使用一次索引,where中如果使用了索引,order by就不会使用
like做字段比较时只有前缀确定时才会使用索引
4.在列上进行运算后不会使用索引,如year(start_time)<2020不会使用start_time上的索引*)
SQL121 创建索引
(1. create index 索引名 ON 表名 – 创建索引;
2. alter table 表名 add [index] 索引名 --alter方式创建索引 ;
3. unique – 唯一索引 , fulltext – 全文索引 )
create index idx_duration on examination_info(duration);
create unique index uniq_idx_exam_id on examination_info(exam_id);
create fulltext index full_idx_tag on examination_info(tag);
SQL122 删除索引
(1.drop方式删除索引:DROP INDEX <索引名> ON <表名> ;
2.alter方式删除索引:ALTER TABLE <表名> DROP INDEX <索引名> )
drop index uniq_idx_exam_id on examination_info;
drop index full_idx_tag on examination_info;
3.聚和分组查询
聚和函数
SQL123 SQL类别高难度试卷得分的截断平均值
select tag,difficulty,
round((sum(score)-max(score)-min(score))/(count(score)-2),1) as clip_avg_score
from exam_record
join examination_info using(exam_id)
where tag='SQL' and difficulty='hard' and score is not null
group by tag,difficulty
SQL124 统计作答次数
select
count(start_time) as total_pv,
count(submit_time) as complete_pv,
count(distinct if(score is not null,exam_id,null)) as complete_exam_cnt
from exam_record
SQL125 得分不小于平均分的最低分
select min(score) as min_score_over_avg
from exam_record
join examination_info using(exam_id)
where tag='SQL' and score>= (select avg(score)
from exam_record
join examination_info using(exam_id)
where tag='SQL')
分组查询
SQL126 平均活跃天数和月活人数
(在MySQL中, count(distinct A,B)是可以的, count(A,B) 报错 ; )
select
date_format(start_time,'%Y%m') as month,
round(count(distinct uid,date_format(start_time,'%Y%m%d') )/count(distinct uid),2)as avg_active_days,
count(distinct uid) as mau
from exam_record
where year(start_time)=2021 and submit_time is not null
group by month
SQL127 月总刷题数和日均刷题数
(1.第三行需要加avg,因为按照月份进行分组之后day(LAST_DAY())是一组数,而不是一个数,所以需要用avg将他转换成一个具体的数;
2.last_day()可以用来返回参数日期的最后一天 ;)
select DATE_FORMAT(submit_time,'%Y%m') submit_month,
count(question_id) month_q_cnt,
round(count(question_id)/avg(day(last_day(submit_time))),3) avg_day_q_cnt
from practice_record
where DATE_FORMAT(submit_time,'%Y')='2021'
group by submit_month
union ALL
SELECT '2021汇总' as submit_month,
count(question_id) month_q_cnt,
round(count(submit_time)/31,3) avg_day_q_cnt
from practice_record
where DATE_FORMAT(submit_time,'%Y')='2021'
order by submit_month
SQL128 未完成试卷数大于一的有效用户
(1. 对于每条作答tag,用 : 连接日期和tag:concat_ws(‘:’, date(start_time), tag) ;
2. group_concat中默认用 逗号 作分隔符,如有需要,可以用 seperator修改分隔符, ;
3. 对于一个人(组内)的多条作答,用 ; 连接去重后的作答记录:group_concat(distinct concat_ws(‘:’, date(start_time), tag) SEPARATOR ‘;’) )
select uid,
sum(if(submit_time is null,1,0)) as incomplete_cnt,
sum(if(submit_time is null,0,1)) as complete_cnt,
group_concat(distinct concat_ws(':', date(start_time), tag) SEPARATOR ';') as detail
from examination_info
right join exam_record using(exam_id)
where year(start_time)=2021
group by uid
having (incomplete_cnt between 2 and 4) and complete_cnt>=1
order by incomplete_cnt desc
4.多表查询
嵌套子查询
SQL129 月均完成试卷数不小于3的用户爱作答的类别
select tag,count(start_time) as tag_cnt
from examination_info
join exam_record using(exam_id)
where uid in (
select uid
from exam_record
group by uid,month(submit_time)
having count(score)>=3
)
group by tag
order by tag_cnt desc
SQL130 试卷发布当天作答人数和平均分
select exam_id,
count(distinct uid) as uv,
round(avg(score),1) as avg_score
from exam_record
where uid in (
select uid
from user_info
where `level`>5 )
and
(exam_id, DATE(start_time)) IN (
SELECT exam_id, DATE(release_time)
FROM examination_info WHERE tag = "SQL")
group by exam_id
order by uv desc,avg_score
SQL131 作答试卷得分大于过80的人的用户等级分布
select level,
count(uid) as level_cnt
from user_info
where uid in (
select uid
from exam_record
where score>80 and exam_id in(
select exam_id
from examination_info
where tag='SQL'
)
)
group by level
order by level_cnt desc
合并查询
SQL132 每个题目和每份试卷被作答的人数和次数
(1.left(tid, 1)表示从tid列中取出从左数的第一个字符(即8001中的8,9001中的9),并进行降序排序 ; )
select exam_id as tid,
count(distinct uid) as uv,
count(start_time) as pv
from exam_record
group by exam_id
union all
select question_id as tid,
count(distinct uid) as uv,
count(submit_time) as pv
from practice_record
group by question_id
order by LEFT(tid,1) desc,uv desc, pv desc
SQL133 分别满足两个活动的人
select uid,
'activity1' as activity
from exam_record
join examination_info using(exam_id)
where year(start_time)=2021 and score>=85
group by uid
union all
select uid,
'activity2' as activity
from exam_record
join examination_info using(exam_id)
where year(start_time)=2021
and difficulty='hard'
and score>80
and timestampdiff(second,start_time,submit_time)<= duration*30
order by uid
连接查询
SQL134 满足条件的用户的试卷完成数和题目练习数
(1.question_cnt这里用到if函数 if(question_cnt is null, 0, question_cnt) 是因为:题目中提到 “1003完成了2次试卷9001、9002,未练习题目(因此计数为0)” ,所以答案要null用0表示;
2.本题注重结构性思维,分块求出exam_cnt,question_cnt, 设置条件,并将这几块段落连接)
select
uid,
exam_cnt,
if(question_cnt is null, 0, question_cnt)
from
(select
uid,
count(submit_time) as exam_cnt
from exam_record
where YEAR(submit_time) = 2021
group by uid) t
left join
(select
uid,
count(submit_time) as question_cnt
from practice_record
where YEAR(submit_time) = 2021
group by uid) t2 using(uid)
where uid in
(
select
uid
from exam_record
join examination_info using(exam_id)
join user_info using(uid)
where tag = 'SQL' and difficulty = 'hard' and `level` = 7
group by uid
having avg(score) >= 80
)
order by exam_cnt asc, question_cnt desc
SQL135 每6/7级用户活跃情况
select u_i.uid as uid,
count(distinct act_month) as act_month_total,
count(distinct case
when year(act_time) = 2021
then act_day
end) as act_days_2021,
count(distinct case
when year(act_time) = 2021
and tag = 'exam'
then act_day
end) as act_days_2021_exam,
count(distinct case
when year(act_time) = 2021
and tag = 'question'
then act_day
end) as act_days_2021_question
from user_info u_i
left join (select uid,
start_time as act_time,
date_format(start_time, '%Y%m') as act_month,
date_format(start_time, '%Y%m%d') as act_day,
'exam' as tag
from exam_record
union all
select uid,
submit_time as act_time,
date_format(submit_time, '%Y%m') as act_month,
date_format(submit_time, '%Y%m%d') as act_day,
'question' as tag
from practice_record
) exam_and_practice
on exam_and_practice.uid = u_i.uid
where u_i.level >= 6
group by uid
order by act_month_total desc, act_days_2021 desc
5.窗口函数
专用窗口函数
SQL136 每类试卷得分前3名
(1. rank() over() 1 2 2 4 4 6 (计数排名,跳过相同的几个,没有3没有5;)
2. row_number() over() 1 2 3 4 5 6 (赋予唯一排名);
3. dense_rank() over() 1 2 2 3 3 4 (不跳过排名,可以理解为对类别进行计数); )
select tag as tid,uid,ranking
from (select
tag,
uid,
dense_rank()over (partition by tag order by max(score) desc,min(score) desc,uid desc) as ranking
from examination_info
left join exam_record using(exam_id)
group by tag,uid) as a
where ranking<=3
SQL137 第二块/慢用时之差大于试卷时长一半的试卷
(1. nth_value(字段名,n)over(partition by…order by…)是窗口函数,表示允许从有序行集中的第N行获取值)
select exam_id,duration,release_time
from(
select exam_id,duration, release_time,
nth_value(timestampdiff(minute,start_time,submit_time),2)
over(partition by exam_id order by timestampdiff(minute,start_time,submit_time) desc) as max_time,
nth_value(timestampdiff(minute,start_time,submit_time),2)
over(partition by exam_id order by timestampdiff(minute,start_time,submit_time) asc) as min_time
from exam_record
join examination_info using(exam_id)
) as a
where max_time-min_time>duration*0.5
group by exam_id
order by exam_id desc
SQL138 连续两次作答试卷的最大时间窗
(lead的窗口函数用法:
- lead(字段名,n) over () :取值向后偏移n行(空间的理解就是直接将一列数据往前推n个位置,后面的位置就空出来了,具体配合图片理解);
- lag(字段名,n) over () :取值向前偏移n行(空间的理解就是直接将一列数据往前后n个位置,前面的位置就空出来了,具体配合图片理解);
- lag(字段名,n,x) over () :取值向前偏移n行,并将空值填充为数字x(空间的理解就是直接将一列数据往前后n个位置,前面的空出来的位置用X填充上,具体配合图片理解*)
时间函数: - datediff(时间1,时间2):计算两个日期之间间隔的天数,单位为日
- timestampdiff(时间单位,开始时间,结束时间):两个日期的时间差,返回的时间差形式由时间单位决定(日,周,月,年)
- date_add(日期,INTERVAL n 时间单位) :返回加上n个时间单位后的日期
- date_sub(日期,INTERVAL n 时间单位 ):返回减去n个时间单位后的日期
- date_format(时间,‘%Y-%m-%d’):强制转换时间为所需要的格式 )
- datediff 与 timestampdiff 的区别:
如9月5号12点,到9月6号的11点,中间隔了23个小时,timestampdiff会从秒分时天的角度来计算,返回的结果是0天,而datediff只会根据9月5号和9月6号的天的时间计算,返回的结果是1天。
SELECT uid, days_window, round(days_window*exam_cnt/diff_days, 2) as avg_exam_cnt
FROM (
SELECT uid,
count(start_time) as exam_cnt, -- 此人作答的总试卷数
DATEDIFF(max(start_time), min(start_time))+1 as diff_days, -- 最早一次作答和最晚一次作答的相差天数
max(DATEDIFF(next_start_time, start_time))+1 as days_window -- 两次作答的最大时间窗
FROM (
SELECT uid, exam_id, start_time,
lead(start_time) over(partition by uid ORDER BY start_time) as next_start_time -- 将连续的下次作答时间拼上
FROM exam_record
WHERE year(start_time)=2021
) as t_exam_record_lead
GROUP BY uid
) as t_exam_record_stat
WHERE diff_days>1
ORDER BY days_window DESC, avg_exam_cnt DESC
SQL139 近三个月未完成试卷数为0的用户完成情况
(1.为什么不能在子查询里用where score is not null筛选空的记录
在子查询中先where筛选会将用户的某条未完成记录去掉,而当月它有完成记录,也有未完成记录时,在外面的查询中会统一他认为它没有未完成记录。所以会多出结果(题目要求是‘近三个有试卷作答记录的月份中没有试卷是未完成状态的用户’);)
select uid, count(start_time) as exam_complete_cnt
from(
select *,
dense_rank() over(partition by uid order by date_format(start_time,'%Y%m') desc) as rk
from exam_record
) as a
where rk<=3
group by uid
having count(score)=count(uid)
order by exam_complete_cnt desc, uid desc
SQL140 未完成率较高的50%用户近三个月答卷情况
(具体可参考:https://blog.nowcoder.net/n/29bdcc42caf1495a83bb91222977f029?f=comment)
select uid,
date_format(start_time,'%Y%m') as start_month,
count(start_time) as total_cnt,
count(submit_time) as complete_cnt
from (
select *,
dense_rank()over(partition by uid order by date_format(start_time,'%Y%m') desc) as rk1
from exam_record ) a
right join (
select *
from (
select uid,
percent_rank()over(order by count(submit_time)/count(start_time)) as rk2
FROM exam_record
where exam_id in (
select exam_id
from examination_info
where tag='SQL')
group by uid) b
where rk2<=0.5
and uid in (
select uid
from user_info
where `level` in (6,7))) c
using (uid)
where rk1<=3
group by uid,start_month
order by uid,start_month
SQL141 试卷完成数同比2020你那的增长率及排名变化
(1. SQL一般默认都是unsigned, 是不能出现负数的,一旦出现负数就可能报错,可用cast(字段 as signed)即可
2. rank1和rank2属于排序, 应该是不会出现负数的, 但是计算排名升降的时候,会有出现降排名, 也就是负数, 而数据库默认是unsigned的所以需要转化一下. 而growth_rate在用concat拼接之后已经是字符串格式了, 所以不需要转换)
SELECT #第三步:取出所有需要的字段,并计算排名变化(2021-2020年,无需取绝对值)
tag,
exam_cnt_20,
exam_cnt_21,
growth_rate,
exam_cnt_rank_20,
exam_cnt_rank_21,
CAST(exam_cnt_rank_21 as SIGNED) - CAST(exam_cnt_rank_20 AS SIGNED) rank_delta
FROM (
SELECT #第二步:计算growth_rate,并生成各tag完成数排名
tag,
exam_cnt_20,
exam_cnt_21,
CONCAT(ROUND((exam_cnt_21 - exam_cnt_20)* 100 / exam_cnt_20 , 1), '%') growth_rate,
RANK() OVER (ORDER BY exam_cnt_20 DESC) exam_cnt_rank_20,
RANK() OVER (ORDER BY exam_cnt_21 DESC) exam_cnt_rank_21
FROM (
SELECT #第一步:找到所有tag在2020,2021上半年的完成数
tag,
SUM(IF(DATE_FORMAT(submit_time, '%Y-%m') BETWEEN '2020-01' AND '2020-06', 1, 0)) exam_cnt_20,
SUM(IF(DATE_FORMAT(submit_time, '%Y-%m') BETWEEN '2021-01' AND '2021-06', 1, 0)) exam_cnt_21
FROM exam_record
LEFT JOIN examination_info ei USING(exam_id)
GROUP BY 1) t1
) t2
WHERE exam_cnt_20 != 0 AND exam_cnt_21 != 0 # 第四步:筛选出2020和2021年均有完成记录的tag,并按题目要求排序
ORDER BY 4 desc, 6 desc
方法二
select tag,
exam_cnt_20,
exam_cnt_21,
concat(round((exam_cnt_21-exam_cnt_20)*100/exam_cnt_20,1),'%') as growth_rate,
exam_cnt_rank_20,
exam_cnt_rank_21,
cast(exam_cnt_rank_21 as signed) - CAST(exam_cnt_rank_20 as signed) as rank_delta
from (
select
exam_id,
count(submit_time) as exam_cnt_20,
rank()over(order by count(submit_time) desc) exam_cnt_rank_20
from exam_record
where year(submit_time)=2020 and month(submit_time) <=6
group by exam_id ) as a
join
(select exam_id,
count(submit_time) as exam_cnt_21,
rank()over(order by count(submit_time) desc) exam_cnt_rank_21
from exam_record
where year(submit_time)=2021 and month(submit_time) <=6
group by exam_id ) as b
using(exam_id)
join examination_info using(exam_id)
order by growth_rate desc, exam_cnt_rank_21 desc
聚和窗口函数
SQL142 对试卷得分做min-max归一化
select uid,exam_id,
round(avg(max_min),0) as avg_new_score
from (
select uid,exam_id,
if(min_score=max_score,score,(score-min_score)*100/(max_score-min_score)) as max_min
from (
select uid,exam_id,score,
min(score)over(partition by exam_id) as min_score,
max(score)over(partition by exam_id) as max_score
from exam_record
join examination_info using(exam_id)
where difficulty='hard' and score is not null
) a
) b
group by exam_id,uid
order by exam_id, avg_new_score desc
SQL143 每份试卷每月作答数和截止当月的作答总数
(聚合窗口函数中,over()的括号中有order by 时,即为计算到当前时间为止的累计数量)
select
exam_id,
date_format(start_time,'%Y%m') as start_month,
count(start_time) as month_cnt,
sum(count(start_time))over(partition by exam_id order by date_format(start_time,'%Y%m')) as cum_exam_cnt
from exam_record
group by exam_id,start_month
SQL144 每月及截止当月的答题情况
(第12行if语句是用来确定是否为新用户,如果这个用户做题时间是最小做题时间,那么这个用户就是新用户。另外,如果不用partition by的话,只能找到那一位最早做题的用户,不能找到新用户。)
select
start_month,
count(distinct uid) as mau,
sum(new_user) as month_add_uv,
max(sum(new_user))over(order by start_month) as max_month_add_uv,
sum(sum(new_user))over(order by start_month) as cum_sum_uv
from (
select *,
date_format(start_time,'%Y%m') as start_month,
if(start_time=min(start_time)over(partition by uid),1,0) as new_user
from exam_record
) a
group by start_month
order by start_month
6.其他常用操作
空值处理
SQL145 统计有未完成状态的试卷的未完成数和未完成率
(题目要求要有未完成状态的试卷,所以要在最后加上having,否则会输出9002这些没有未完成状态的试卷)
select
exam_id,
sum(if (score is null,1,0)) as incomplete_cnt,
round(sum(if (score is null,1,0))/count(start_time),3) as complete_rate
from exam_record
group by exam_id
having incomplete_cnt <>0
SQL146 0级用户高难度试卷的平均用时和平均得分
select
uid,
round(avg(if(score is null,0,score)),0) as avg_score,
round(avg(if(submit_time is null,duration,timestampdiff(minute,start_time,submit_time))),1) as avg_time_took
from exam_record
join user_info using(uid)
join examination_info using(exam_id)
where `level`=0 and difficulty='hard'
group by uid
高级条件语句
SQL147 筛选限定昵称成就值活跃日期的用户
select
uid,
nick_name,
achievement
from user_info
where nick_name like '牛客%号'
and achievement between 1200 and 2500
and uid in (
select uid
from (
select uid,submit_time
from exam_record
union all
select uid,submit_time
from practice_record
) a
group by uid
having date_format(max(submit_time),'%Y%m')=202109
)
SQL148 筛选昵称规则和试卷规则的作答记录
select
uid,
exam_id,
round(avg(score) ,0) as avg_score
from exam_record
where uid in (
select uid
from user_info
where nick_name regexp "^牛客[0-9]+号$|^[0-9]+$"and submit_time is not null )
and exam_id in (
select exam_id
from examination_info
where tag regexp"^[Cc]" and submit_time is not null )
group by uid,exam_id
order by uid,avg_score
方法二
select
select uid,exam_id,round(sum(score)/count(exam_id),0) as avg_score
from exam_record
left join user_info using(uid)
left join examination_info using(exam_id)
where nick_name regexp '^牛客[0-9]+号$|^[0-9]+$'
and tag regexp '^[Cc]'
and submit_time is not NULL
group by uid,exam_id
order by 1,3
SQL149 根据指定记录是否存在输出不同的情况
(1. 题目中提到当有任意一个0级用户未完成试卷数大于2时,输出每个0级用户的试卷未完成数和未完成率,所以第一个where exists 用来判断是否存在有0级用户未完成试卷数大于2,如果不存在,输出false,如果存在,后面还要加一个 level=0过滤得到所有0级用户
2.题目中提到输出所有0级用户的信息,而不是所有有作答记录的0级用户的信息,所以a表中要用到right join,如果不用right join,就会只显示三个有答题记录的用户的信息,其他没有答题记录的0级用户就不显示)
with a as(
select uid,level,
count(start_time)-count(submit_time) as incomplete_cnt,
round(ifnull(1-count(submit_time)/count(start_time),0),3) as incomplete_rate,
count(start_time) as total_cnt
from exam_record
right join user_info using(uid)
group by uid)
select uid,incomplete_cnt,incomplete_rate
from a
where exists ( select incomplete_cnt,level from a where level=0 and incomplete_cnt>2)
and level=0
union all
select uid,incomplete_cnt,incomplete_rate
from a
where not exists ( select incomplete_cnt,level from a where level=0 and incomplete_cnt>2 )
and total_cnt>0
order by incomplete_rate
SQL150 各用户等级的不同得分表现占比
(* 1. count()在统计结果的时候,不会忽略列值为NULL;
count(列名)只包括列名那一列,在统计结果的时候,会忽略列值为空。
2. 本题中,如果用count(列名)则会报错only full group by,可能是因为在第二行用到了count这一含有partition by level的字段,与后面的group by level 相冲突,使这一行不依赖于后面group by的分组,不符合SQL中的语句规则。此时使用count(),不指代某一列,则可以解决这个问题)
select level,score_grade,
round(count(uid)/level_cnt,3) as ratio
from (
select level,score,uid,exam_id,
case when score>=90 then'优'
when score>=75 then '良'
when score>=60 then '中'
else'差' end as score_grade,
count(*)over(partition by level) as level_cnt
from exam_record
join user_info using(uid)
where score is not null
) as a
group by level,score_grade
order by level desc,ratio desc
限量查询
SQL151 注册时间最早的三个人
select uid,nick_name,register_time
from user_info
order by register_time
limit 3
SQL152 注册当天就完成了试卷的名单第三页
(limit x,y 表示从第x行开始取,取y行。在实际操作中,第一行是0,而不是1。举个例子:limit 1,1 表示的是从第二行开始取,取一行;limit 3 表示 取前三行)
select uid,
level,
register_time,
max(score) as max_score
from exam_record
join examination_info using(exam_id)
join user_info using(uid)
where job='算法' and tag='算法'
and timestampdiff(day,register_time,submit_time)=0
group by uid,exam_id
order by max_score desc
limit 6,3
文本转换函数
SQL153 修复串列了的记录
(1. substring_index(字符串, ‘分隔符’ , n),其中n表示搜索分隔符的次数,若为正,返回分隔符左侧的数字,若为负,返回分隔符右侧的数字)
select
exam_id,
substring_index(tag,',',1) as tag,
substring_index((substring_index(tag,',',2)),',',-1) as difficulty,
substring_index(tag,',',-1) as duratiom
from examination_info
where difficulty=''
SQL154 对过长的昵称截取处理
( 1. char_length()可以返回字符串的长度
2. substring(字符串,起始位置,提取字符数)length若为正,从开头提取,若为负,从末尾提取 )
select
uid,
if( char_length(nick_name)>13, concat(substring(nick_name,1,10),'...') , nick_name)
from user_info
where char_length(nick_name)>10
SQL155 大小写混乱时的筛选统计
(题目:试卷的类别tag可能出现大小写混乱的情况,请先筛选出试卷作答数小于3的类别tag,统计将其转换为大写后对应的原本试卷作答数。
如果转换后tag并没有发生变化,不输出该条结果。
1.筛选出试卷作答数小于3的类别tag:a.answer_cnt<3 ;
2.统计将其转换为大写后对应的原本试卷作答数:upper(a.tag)=b.tag;
3.如果转换后tag并没有发生变化,不输出该条结果:a.tag != b.tag;
4.本题难点在于两个表的连接。
最后一行的 on upper(a.tag)=b.tag , 使b表变成了只有tag为大写的表,因此,b表中的tag全是大写的,b表中的amswer_cnt也是只有大写tag对应的数量 ;
如果把第一行select后的a.tag换成b.tag 则会输出大写的tag 而不是小写的tag;
如果把第一行select后的b.answer_cnt换成a.answer_cnt 则会输出小写tag对应的数量,而不是大写的tag对应的数量;
为了便于理解,可参照下图思考:
)
select a.tag, b.answer_cnt
from (
select tag,count(start_time) as answer_cnt
from exam_record
join examination_info using(exam_id)
group by tag ) as a
join(
select tag,count(start_time) as answer_cnt
from exam_record
join examination_info using(exam_id)
group by tag ) as b
on upper(a.tag)=b.tag and a.answer_cnt<3 and a.tag != b.tag