WonderfulSQL-task6

最新推荐文章于 2024-07-20 19:58:01 发布

yxyibb

最新推荐文章于 2024-07-20 19:58:01 发布

阅读量112

点赞数

分类专栏：数据库文章标签： mysql 数据库

本文链接：https://blog.csdn.net/u012835414/article/details/119986436

版权

数据库专栏收录该内容

11 篇文章 0 订阅

订阅专栏

WonderfulSQL-task6

(Datawhale28期组队学习)

A

练习一: 各部门工资最高的员工（难度：中等）

#练习一: 各部门工资最高的员工（难度：中等）
-- # 1.max salary
-- select max(Salary) max_salary, DepartmentId 
-- from Employee
-- group by DepartmentId;

# 2.join department.name, employee.name
select d.name Department, e.name Employee, s.max_salary Salary
from department d
left join employee e 
on d.id = e.departmentid
left join(
	select max(Salary) max_salary, DepartmentId 
	from Employee
	group by DepartmentId
) s
on d.id = s.departmentid
where e.salary = s.max_salary
order by s.max_salary desc

要点:

先找出max(salary)->找到的，最大工资的新表
left join employee.name, department.name

练习二: 换座位（难度：中等）

select  case when(id%2 != 0 and id !=(select count(*) from seat)) then id+1
			 when(id%2 = 0) then id-1
	    else id
        end id,
        student
from seat
order by id

要点:

交换位置，奇数位id+1，偶数位id-1；
最后如果是奇数，不变
case条件语句

练习三: 分数排名（难度：中等）

#数据有问题，主要考察排序的窗口函数
# (1)rank
select id, score, rank() over(order by score ) score_avg
from score;
 
#(2)dense_rank
select id, score, dense_rank() over(order by score ) score_avg
from score;        

# (3)row_number
select id, score, row_number() over(order by score ) score_avg
from score;

要点:

rank
2. dense_rank
3. row_number

练习四：连续出现的数字（难度：中等）

#4 lianxu
select distinct l1.num consecutiveNums 
from logs l1, logs l2, logs l3 
where l1.num = l2.num
and l2.num = l3.num
and l1.id = l2.id +1
and l2.id = l3.id+1

要点:

连续的表，重复查三次，三次id均+1
重复查三次，看成查三个同样的表，用num相同来连接

练习五：树节点（难度：中等）

#id 在pid：
-- select distinct t1.id
-- from tree t1, tree t2
-- where t1.id = t2.p_id;

select id, (case when (select count(*) from tree) =1 then 'Root'
				when id not in (select distinct t1.id
								from tree t1, tree t2
								where t1.id = t2.p_id) then 'Leaf'
				when p_id is null then 'Root'
                else 'Inner'
                end) Type
from tree

要点:

root, 只有1个节点；pid=null;（注意只有一个节点的情况）
leaf, id 不在 pid;
inner,else的情况（注意排除所有root，leaf情况）；

在这里插入图片描述

练习六：至少有五名直接下属的经理（难度：中等）

# 6 count(distinct id) >=5
-- select ManagerId, count(Distinct id) cnt
-- from  Employee2
-- group by ManagerId
-- having cnt>=5;

select name
from Employee2
join (
	select ManagerId, count(Distinct id) cnt
	from  Employee2
	group by ManagerId
	having cnt>=5
) ee
on id = ee.managerid

要点:

先找到，在以managerid分组的情况下，计算count(distinct id) >=5的managerid
内连，通过 id = ee.managerid 找到manager的name

在这里插入图片描述

练习七：查询回答率最高的问题（难度：中等）

#1比例
select question_id, 
	(sum(case when answer_id is not null then 1 else 0 end)/sum(case when `action` like 'show' then 1 else 0 end)) rate
	from survey_log
#2groupby
select question_id, 
	(sum(case when answer_id is not null then 1 else 0 end)/sum(case when `action` like 'show' then 1 else 0 end)) rate
	from survey_log
	group by question_id
#3取最大
select question_id, 
	(sum(case when answer_id is not null then 1 else 0 end)/sum(case when `action` like 'show' then 1 else 0 end)) rate
from survey_log
group by question_id
order by rate desc
limit 1
#从新表中找出question_id
select question_id surver_log
from 
(
	select question_id, 
	(sum(case when answer_id is not null then 1 else 0 end)/sum(case when `action` like 'show' then 1 else 0 end)) rate
	from survey_log
	group by question_id
	order by rate desc
    limit 1
)ss

要点:

同一个问题出现的次数中回答的比例
根据question_id 分组，计算比例
逆序排序，取第一个即rate最大->找到新表
套一层表，取question_id(题目中只需要给出question_id，不需要rate

在这里插入图片描述

练习八：各部门前3高工资的员工（难度：中等）

select d.name department, a.name name, a.salary 
from department d,
	(SELECT name, salary ,departmentid,
	 Dense_rank() over(partition by departmentid order by salary desc) as ranking 
     from  employee9 ) a 
where a.departmentid = d.id 
and a.ranking <= 3

要点:

根据departmentid，按部门分组，用工资排序得到含有ranking的新表
新表与department表连接得到department.name
排序，ranking<=3

练习九：平面上最近距离 (难度: 困难）

select round(min(sqrt(power(p1.x -p2.x,2)+power(p1.y-p2.y,2))),2) shortest
from point_2d p1
left join point_2d p2
on (p1.x, p1.y) != (p2.x, p2.y)

要点:

计算即可

在这里插入图片描述

练习十：行程和用户（难度：困难）

-- # 非禁止用户：banned=no
-- (select status, request_at
-- from trips t
-- left join users u
-- on t.client_id = u.users_id 
-- where u.banned='no');

-- # 取消率：取消/all
-- round(sum(case temp.status when 'completed' then 0 else 1 end)/
-- count(temp.status),2)

#拼接
select temp.request_at Day,
	round(sum(case temp.status when 'completed' then 0 else 1 end)/
	count(temp.status),2) 'Cancellation Rate'
from 
	(select status, request_at
	 from trips t
	 left join users u
	 on t.client_id = u.users_id 
	 where u.banned='no') temp
where request_at between '2013-10-01' and '2013-10-03'
group by temp.request_at

要点:

题目比较长，复杂，需求分层分解问题！！！
先找到非禁止用户的订单，形成新表
计算取消率，新表中，取消的订单/all订单
日期，between ‘2013-10-01’ and ‘2013-10-03’

B

练习一：行转列

select name,
	max(case when subject ='chinese' then score else 0 end) as 'chinese',
	max(case when subject = 'math' then score else 0 end) as 'math',
	max(case when subject = 'english' then score else 0 end) as 'english'
from scores
group by name

##
SELECT *
FROM student
PIVOT (
    SUM(score) FOR subject IN (语文, 数学, 英语)
)

要点:

case when 语句，max()取巧

练习二：列转行

select name, 'chinese' subject, max('chinese') score
from scores_lie
group by name
union
select name, 'math' subject, max('math') score
from scores_lie
group by name
union
select name, 'english' subject, max('english') score
from scores_lie
group by name

要点:

case when 语句

练习三：带货主播

#star date
select  date, (case when date then 1 else 0 end) as date_num 
from anchor_sales
group by date
having max(sales)/sum(sales) >=0.9;

#star name
select ass.anchor_name, ass.date
from anchor_sales ass
inner join (
	select  date, sum(sales) as sum
	from anchor_sales
	group by date
	having max(sales)/sum(sales) >=0.9
) aa
on ass.date = aa.date
where ass.sales >= 0.9*aa.sum

要点:

计算的是当日销售额，要group by date
主播日，找到having约束条件下的date，有即记录
主播，要新表与原表连结找到对应的name，这里注意，单用date连结不够，要判断sales的值要>=0.9

在这里插入图片描述

练习四：MySQL 中如何查看sql语句的执行计划？可以看到哪些信息？

explain select * from <表名>;

id, select_type, table, partitions, type, possible_keys, key, key_len, ref, rows, filtered, Extra

练习五：解释一下 SQL 数据库中 ACID 是指什么

ACID，是指数据库管理系统（DBMS）在写入或更新资料的过程中，为保证事务（transaction）是正确可靠的，所必须具备的四个特性：原子性（atomicity，或称不可分割性）、一致性（consistency）、隔离性（isolation，又称独立性）、持久性（durability）

Atomicity（原子性）：一个事务（transaction）中的所有操作，要么全部完成，要么全部不完成，不会结束在中间某个环节。事务在执行过程中发生错误，会被恢复（Rollback）到事务开始前的状态，就像这个事务从来没有执行过一样。
Consistency（一致性）：在事务开始之前和事务结束以后，数据库的完整性没有被破坏。这表示写入的资料必须完全符合所有的预设规则，这包含资料的精确度、串联性以及后续数据库可以自发性地完成预定的工作。
Isolation（隔离性）：数据库允许多个并发事务同时对其数据进行读写和修改的能力，隔离性可以防止多个事务并发执行时由于交叉执行而导致数据的不一致。事务隔离分为不同级别，包括读未提交（Read uncommitted）、读提交（read committed）、可重复读（repeatable read）和串行化（Serializable）。
Durability（持久性）：事务处理结束后，对数据的修改就是永久的，即便系统故障也不会丢失。

C

练习一：行转列

参考B练习

create table competition
(cdate date,
result varchar(32));

insert into competition values ('2021-1-1', '胜'),
							   ('2021-1-1', '负'),
                               ('2021-1-3', '胜'),
                               ('2021-1-3', '负'),
                               ('2021-1-1', '胜'),
                               ('2021-1-3', '负');
                               
select cdate,
sum(case when result='胜' then 1 else 0 end) as '胜',
sum(case when result='负' then 1 else 0 end) as '负'
from competition
group by cdate；

练习二：列转行

参考B练习

select * from (SELECT 
    cdate, '胜' AS result
FROM competition1
UNION ALL SELECT 
    cdate, '负' AS result
FROM
    competition1) x
union all
select * from (SELECT 
    cdate, '胜' AS result
FROM competition1 as p1
where cdate='2021-1-1'
UNION ALL SELECT 
    cdate, '负' AS result
FROM
    competition1 
where cdate='2021-1-3') y
order by cdate, result;

练习三：连续登录

计算2021年每个月，每个用户连续登录的最多天数

#先求出每个用户在一段时间内的排名
select uid, imp_date, row_number() over(partition by uid order by imp_date) as rn
from t_act_records;

#根据rn列得出的结果,用当前日期减去rn,那么如果两天是连续的,那么相邻两行得到的日期应该相等
select uid, imp_date, date_sub(imp_date, interval rn day) as day_cha
from (select uid, imp_date, row_number() over(partition by uid order by imp_date) as rn
	  from t_act_records) a

#根据uid和day_cha分组,对day_char进行count,得到的day_continuous列就是该用户连续的天数
select uid, count(day_cha) day_continuous
from (select uid, imp_date, date_sub(imp_date, interval rn day) as day_cha
	  from (select uid, imp_date, row_number() over(partition by uid order by imp_date) as rn
			from t_act_records) a) b
group by uid, day_cha

#4.day_continuous列中找到每个用户最大的值
select uid, max(day_continuous) max_continuous_days
from(select uid, count(day_cha) day_continuous
	 from (select uid, imp_date, date_sub(imp_date, interval rn day) as day_cha
		   from (select uid, imp_date, row_number() over(partition by uid order by imp_date) as rn
				 from t_act_records) a) b
	 group by uid, day_cha) c
group by uid

在这里插入图片描述

计算2021年每个月，连续2天都有登录的用户名

select uid 
from(
	select uid, count(day_cha) day_continuous
	from (select uid, imp_date, date_sub(imp_date, interval rn day) as day_cha
		  from (select uid, imp_date, row_number() over(partition by uid order by imp_date) as rn
			    from t_act_records) a) b
group by uid, day_cha) c
where day_continuous > 1

在这里插入图片描述

计算2021年每个月，连续5天都有登录的用户数

select count(day_continuous)
from (select uid, count(day_cha) day_continuous
	  from (select uid, imp_date, date_sub(imp_date, interval rn day) as day_cha
		    from (select uid, imp_date, row_number() over(partition by uid order by imp_date) as rn
			      from t_act_records) a) b
      group by uid, day_cha) c
where day_continuous= 5

在这里插入图片描述

练习四：hive 数据倾斜的产生原因及优化策略？

1.key分布不均匀
2.业务数据本身的特性
3.SQL语句造成数据倾斜

参考

https://github.com/datawhalechina/wonderful-sql

yxyibb

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
WonderfulSQL-task6

WonderfulSQL-task6(Datawhale28期组队学习)练习一: 各部门工资最高的员工（难度：中等）#练习一: 各部门工资最高的员工（难度：中等）-- # 1.max salary-- select max(Salary) max_salary, DepartmentId -- from Employee-- group by DepartmentId;# 2.join department.name, employee.nameselect d.name Departm
复制链接

扫一扫