文章目录
一、基础查询
select device_id,gender,age,university from user_profile
select * from user_profile
select distinct university from user_profile
select device_id from user_profile limit 2 offset 0
select device_id as user_infos_example from user_profile limit 2 offset 0
二、条件查询
1.基础排序
select device_id,age
from user_profile
order by age
select device_id,gpa,age
from user_profile
order by gpa,age
select device_id,gpa,age
from user_profile
order by gpa desc,age desc
2.基础操作符
select device_id,university from user_profile where university='北京大学'
select device_id,gender,age,university
from user_profile
where age>24
select device_id,gender,age
from user_profile
where age BETWEEN 20 and 23
SELECT device_id,gender,age,university
from user_profile
#where university <> '复旦大学'
#where university != '复旦大学'
where university not in ('复旦大学')
select device_id,gender,age,university
from user_profile
#where age is not null
#where age <> ''
where age != ''
3、高级操作符
select device_id,gender,age,university,gpa
from user_profile
where gpa>3.5 and gender='male'
select device_id,gender,age,university,gpa
from user_profile
where university='北京大学' or gpa>3.7
select device_id,gender,age,university,gpa
from user_profile
where university in ('北京大学','复旦大学','山东大学')
select device_id,gender,age,university,gpa
from user_profile
where (gpa>3.5 and university='山东大学') OR (gpa>3.8 and university='复旦大学')
select device_id,age,university
from user_profile
where university like '%北京%'
三、高级查询
1.计算函数
select max(gpa) from user_profile
where university='复旦大学'
select count(gender) male_num,avg(gpa)
from user_profile
where gender='male'
2.分组查询
select gender,university,count(gender) user_num,
avg(active_days_within_30) avg_active_day,
avg(question_cnt) avg_question_cnt
from user_profile
group by gender,university
select university,
avg(question_cnt) avg_question_cnt,
avg(answer_cnt) avg_answer_cnt
from user_profile
group by university
having avg_question_cnt<5 or avg_answer_cnt<20
select university,
avg(question_cnt) avg_question_cnt
from user_profile
group by university
order by avg_question_cnt
四、多表查询
1.子查询
select user_profile.device_id,question_id,result
from user_profile inner join question_practice_detail
on user_profile.device_id=question_practice_detail.device_id
where university='浙江大学'
order by question_id
2.链接查询
select university,count(question_id)/count(distinct user_profile.device_id) as avg_answer_cnt
from user_profile join question_practice_detail
on user_profile.device_id=question_practice_detail.device_id
group by university
select university,difficult_level,
count(question_practice_detail.question_id)/count(distinct user_profile.device_id )
from question_practice_detail
inner join user_profile on user_profile.device_id=question_practice_detail.device_id
left join question_detail on question_detail.question_id=question_practice_detail.question_id
group by university,difficult_level
select university,difficult_level,
count(question_practice_detail.question_id)/count(distinct user_profile.device_id )
from question_practice_detail
inner join user_profile on user_profile.device_id=question_practice_detail.device_id
left join question_detail on question_detail.question_id=question_practice_detail.question_id
where university='山东大学'
group by university,difficult_level
3.组合查询
(select device_id,gender,age,gpa
from user_profile
where university='山东大学')
union all#union会对结果去重
(select device_id,gender,age,gpa
from user_profile
where gender='male')
五、必会的常用函数
1.条件函数
select (case when age>=25 then '25岁及以上'
else '25岁以下' end)as age_cut,
count(device_id)
from user_profile
group by age_cut
select device_id,gender,
(case
when age<20 then '20岁以下'
when age>=25 then '25岁及以上'
when age<=24 then '20-24岁'
else '其他' end) as age_cut
from user_profile
2.日期函数
select day(date) day,count(question_id) question_cnt
from question_practice_detail
where year(date)=2021 and month(date)=8
group by day
select count(distinct q2.device_id,q2.date)/count(distinct q1.device_id,q1.date)
from question_practice_detail q1
left join question_practice_detail q2
on q1.device_id=q2.device_id and datediff(q1.date,q2.date)=1
- 单表自连接,用datediff函数取出来的数据q2的日期比q2的日期数字小一天,而且这样取出来的数据会有重复,所以在select子句中会用(distinct q2.device_id,q2.date)进行筛选
- 下面是自连接的结果
select *
from question_practice_detail q1
left join question_practice_detail q2
on q1.device_id=q2.device_id and datediff(q1.date,q2.date)=1
1|2138|111|wrong|2021-05-03|None|None|None|None|None
2|3214|112|wrong|2021-05-09|None|None|None|None|None
3|3214|113|wrong|2021-06-15|None|None|None|None|None
4|6543|111|right|2021-08-13|None|None|None|None|None
5|2315|115|right|2021-08-13|None|None|None|None|None
6|2315|116|right|2021-08-14|11|2315|115|right|2021-08-13
6|2315|116|right|2021-08-14|5|2315|115|right|2021-08-13
7|2315|117|wrong|2021-08-15|12|2315|116|right|2021-08-14
7|2315|117|wrong|2021-08-15|6|2315|116|right|2021-08-14
8|3214|112|wrong|2021-05-09|None|None|None|None|None
9|3214|113|wrong|2021-08-15|None|None|None|None|None
10|6543|111|right|2021-08-13|None|None|None|None|None
11|2315|115|right|2021-08-13|None|None|None|None|None
12|2315|116|right|2021-08-14|11|2315|115|right|2021-08-13
12|2315|116|right|2021-08-14|5|2315|115|right|2021-08-13
13|2315|117|wrong|2021-08-15|12|2315|116|right|2021-08-14
13|2315|117|wrong|2021-08-15|6|2315|116|right|2021-08-14
14|3214|112|wrong|2021-08-16|9|3214|113|wrong|2021-08-15
15|3214|113|wrong|2021-08-18|None|None|None|None|None
16|6543|111|right|2021-08-13|None|None|None|None|None
3.文本函数
select substring_index(profile,',',-1) as gender,count(device_id) number
from user_submit
group by gender
substring_index(str,delim,count)
str 是要处理的字符串,delim是分隔符,count是第几个
select device_id,substring_index(blog_url,'/',-1) user_name
from user_submit
select substring(profile,12,2) age,count(device_id) number
from user_submit
group by age
SUBSTRING ( expression, start[, length]) 截取字符串
- expression:字符串、二进制字符串、文本、图像、列或包含列的表达式。请勿使用包含聚合函
数的表达式。- start:整数或可以隐式转换为int 的表达式,指定子字符串的开始位置。
- length:整数或可以隐式转换为 int 的表达式,指定子字符串的长度。
3.窗口函数
方法1
select device_id,u2.university,gpa
from user_profile u1
right join
(select university, min(gpa) m_gpa
from user_profile
group by university) u2
on u2.university=u1.university and u2.m_gpa=u1.gpa
order by u2.university
方法2
select device_id,university,gpa
from
(select * ,row_number() over(partition by university order by gpa) as rank2
from user_profile) t
where rank2=1
六、综合练习
select u.device_id,university,count(question_id) question_cnt,
sum(if(result='right',1,0)) right_question_cnt
from (select * from user_profile where university='复旦大学') u
left join
(select * from question_practice_detail where year(date)=2021 and month(date)=8) q
on q.device_id=u.device_id
group by u.device_id
- 表连接结果(这样连接,会把未答题的学生也取进来)
2|3214|male|None|复旦大学|4.000|15|5|25|15|3214|113|wrong|2021-08-18
2|3214|male|None|复旦大学|4.000|15|5|25|14|3214|112|wrong|2021-08-16
2|3214|male|None|复旦大学|4.000|15|5|25|9|3214|113|wrong|2021-08-15
7|4321|male|28|复旦大学|3.600|9|6|52|None|None|None|None|None- 在连接表的时候就对标表进行条件筛选,会更清晰
select difficult_level,sum(if(result='right',1,0))/count(t1.question_id) correct_rate
from question_practice_detail t1
left join user_profile t2 on t1.device_id=t2.device_id
left join question_detail t3 on t1.question_id=t3.question_id
where t2.university='浙江大学'
group by difficult_level
order by correct_rate
- 表连接结果
5|2315|115|right|2021-08-13|4|2315|female|23|浙江大学|3.600|5|1|2|4|115|easy
6|2315|116|right|2021-08-14|4|2315|female|23|浙江大学|3.600|5|1|2|5|116|medium
7|2315|117|wrong|2021-08-15|4|2315|female|23|浙江大学|3.600|5|1|2|6|117|easy
11|2315|115|right|2021-08-13|4|2315|female|23|浙江大学|3.600|5|1|2|4|115|easy
12|2315|116|right|2021-08-14|4|2315|female|23|浙江大学|3.600|5|1|2|5|116|medium
13|2315|117|wrong|2021-08-15|4|2315|female|23|浙江大学|3.600|5|1|2|6|117|easy
select count(distinct device_id) did_cnt,count(question_id) question_cnt
from question_practice_detail
where year(date)=2021 and month(date)=8