sql exercise
1 ifnull:
IFNULL(if not null then, if null then)
select Ifnull(
(select distinct(salary)
from employee
order by salary desc
limit 1,1), null) as 'SecondHighestSalary'
2 inner join 可以 在on里面进行筛选
3 窗口函数
- over(order by xxx) 按照xxx排序进行累计,order by是个默认的开窗函数
- over(partition by xxx)按照xxx分区
- over(partition by xxx order by xx)按照xxx分区,并以xx排序
首先创建一张简单的表(CLIENT):
1.sum函数的用法
情况一:按照ID排序
SELECT NAME,
SUM(“ID”) OVER(ORDER BY “ID” )
FROM CLIENT;
实现逻辑:按照ID升序排序,第N行数据为:第N行相等排序值唯一时,累加第一行至第N-1行值,并加上第N行的数据作为第N行的最终值;第N行相等排序值不唯一时,累加第一行至第N-1行值,并加上第N行的数据*n(n为与第N行相同值的个数)数据作为第N行的最终值;
情况二:按照xx分区 – over(partition by xxx)
SELECT NAME,
SUM(“ID”) OVER(PARTITION BY NAME )
FROM CLIENT;
实现逻辑:某个分区的值:按照 NAME 分区,将与NAME相同分区下的值求和;
情况三:按照xxx分区,并以xx排序
over(partition by xxx order by xx)
SELECT NAME,
SUM(“ID”) OVER(PARTITION BY NAME ORDER BY “ID” )
FROM CLIENT;
得到结果:
实现逻辑:如果理解了前两种实现逻辑,那么这种情况其实很容易理解,就是前两种的结合体。先按照情况二按照NAME分区,然后按照情况一处理分区内的数据即可;
情况四
over()
over中为空
SELECT name, SUM(“ID”) over()
FROM CLIENT;
实现逻辑:此时,其实就是对所有ID进行求和而已, 你可认为over()函数不存在一样处理得到值,不同的是,此时会有多条数据,而如果没有over的sum 仅有一条数据,这也正是over函数的一大特点;
2.first_value与last_value
select
user_id,
user_type,
ROW_NUMBER() OVER(PARTITION BY user_type ORDER BY sales) AS row_num, #从一开始按照分组记录数据
first_value(user_id) over (partition by user_type order by sales desc) as max_sales_user,
first_value(user_id) over (partition by user_type order by sales asc) as min_sales_user,
last_value(user_id) over (partition by user_type order by sales desc) as curr_last_min_user,
last_value(user_id) over (partition by user_type order by sales asc) as curr_last_max_user
from
order_detail;
2 RANK、ROW_NUMBER、DENSE_RANK
select
user_id,user_type,sales,
RANK() over (partition by user_type order by sales desc) as r,
ROW_NUMBER() over (partition by user_type order by sales desc) as rn,
DENSE_RANK() over (partition by user_type order by sales desc) as dr
from
order_detail;
RANK() 生成数据项在分组中的排名,排名相等会在名次中留下空位
DENSE_RANK() 生成数据项在分组中的排名,排名相等会在名次中不会留下空位
3.NTILE
select
user_type,sales,
--分组内将数据分成2片
NTILE(2) OVER(PARTITION BY user_type ORDER BY sales) AS nt2,
--分组内将数据分成3片
NTILE(3) OVER(PARTITION BY user_type ORDER BY sales) AS nt3,
--分组内将数据分成4片
NTILE(4) OVER(PARTITION BY user_type ORDER BY sales) AS nt4,
--将所有数据分成4片
NTILE(4) OVER(ORDER BY sales) AS all_nt4
from
order_detail
order by
user_type,
sales
NTILE(n) 用于将分组数据按照顺序切分成n片,返回当前切片值,如果切片不均匀,默认增加第一个切片的分布。NTILE不支持ROWS BETWEEN,比如 NTILE(2) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN 3 PRECEDING AND CURRENT ROW)。
4.一些例子
求取sale前20%的用户ID
select
user_id
from
(select
user_id,
NTILE(5) OVER(ORDER BY sales desc) AS nt
from
order_detail
)A
where nt=1;
## CUME_DIST、PERCENT_RANK
select
user_id,user_type,sales,
--没有partition,所有数据均为1组
CUME_DIST() OVER(ORDER BY sales) AS cd1,
--按照user_type进行分组
CUME_DIST() OVER(PARTITION BY user_type ORDER BY sales) AS cd2
from
order_detail;