hive之开窗函数

开窗函数结构:
分析函数+窗口子句

窗口子句
over(partition by ** order by ** rows between ** and **)
其中between ** and ** 之间可以填

起始结束解释
unbounded precedingcurrent row从第一行到当前行,这是默认的情况
3 precedingcurrent row从向前3行到当前行
3 preceding3 following当前行,向前3行,向后3行
current rowunbounded following从当前行到后面

分析函数包括

分析函数用法
sum(列名)有order by 的时候,计算的是分区排序后一个个叠加的值,不加order by 则为分区内的综合
min(列名)、max(列名)、avg(列名)与order by 有关
count(列名)与order by 有关
row_number()从1开始,按照顺序生成分组内记录的序列1,2,3,4,5.
rank()生成数据项在分组内的排名,排名相等会在名次中留下空位 1,2,3,3,5 ,先order by
dense_rank()生成数据项在分组内的排名,排名相等不会在名次中留下空位 1,2,3,3,4,先order by
ntile(n)将分组数据按照顺序切分成n片,返回当前切片值 1,2,…,n;切片不均匀,增加第一个切片分分布,如7行分成3份,则返回的切片值为1,1,1,2,2,3,3;不支持rows between,先order by
cum_dist()返回小于等于当前值的行数/分组内总行数,先order by
percent_rank()(分组内当前行的rank值-1)/(分组内总行数-1),先order by
lag(列名,向上n行,default 值)返回向上第n行的值,没有的话返回默认值或者null
lead(列名,向下n行,default 值)返回向下第n行的值,没有的话返回默认值或者null
first_value(列名)取分组内排序后,截至到当前行的第一个值,与order by 有关,与rows between 有关
last_value(列名)取分组排序后,截至到当前行的最后一个值,与order by 有关,与rows between 有关

练习1

练习题

创建表


-- CREATE TABLE IF NOT EXISTS student_scores(
-- id INT,
-- studentId INT,
-- language INT,
-- math INT,
-- english INT,
-- classId STRING,
-- departmentId STRING
-- );

插入数据


-- insert into table student_scores values 
--   (1,111,68,69,90,'class1','department1'),
--   (2,112,73,80,96,'class1','department1'),
--   (3,113,90,74,75,'class1','department1'),
--   (4,114,89,94,93,'class1','department1'),
--   (5,115,99,93,89,'class1','department1'),
--   (6,121,96,74,79,'class2','department1'),
--   (7,122,89,86,85,'class2','department1'),
--   (8,123,70,78,61,'class2','department1'),
--   (9,124,76,70,76,'class2','department1'),
--   (10,211,89,93,60,'class1','department2'),
--   (11,212,76,83,75,'class1','department2'),
--   (12,213,71,94,90,'class1','department2'),
--   (13,214,94,94,66,'class1','department2'),
--   (14,215,84,82,73,'class1','department2'),
--   (15,216,85,74,93,'class1','department2'),
--   (16,221,77,99,61,'class2','department2'),
--   (17,222,80,78,96,'class2','department2'),
--   (18,223,79,74,96,'class2','department2'),
--   (19,224,75,80,78,'class2','department2'),
--   (20,225,82,85,63,'class2','department2');

在这里插入图片描述

count()

SELECT studentid,math,departmentid,classid,
count(math) over() as count1,  
count(math) over(PARTITION BY classid) as count2,
count(math) OVER(PARTITION BY classid ORDER BY math ) as count3,
count(math) over(PARTITION BY classid ORDER BY math ROWS BETWEEN 1 PRECEDING and 2 FOLLOWING) as count4
FROM student_scores WHERE departmentid='department1'

在这里插入图片描述
解释:对于某一行,count1统计所有的行数,count2统计不同classid的行数,count3统计当前classid排序后小于等于当前值的行数,count4统计前一行后两行、当前行的总行数(以实际行数为准)

sum()

SELECT studentid,math,departmentid,classid,
sum(math) OVER() as sum1,
sum(math) over(PARTITION BY classid) as sum2,
sum(math) over(PARTITION BY classid order by math) as sum3,
sum(math) over(PARTITION BY classid ORDER BY math ROWS BETWEEN 1 PRECEDING AND 2 FOLLOWING) as sum4
FROM student_scores
WHERE departmentid='department1'

在这里插入图片描述

min()

SELECT studentid,math,departmentid,classid,
min(math) over() as min1,
min(math) OVER(PARTITION BY classid) as min2,
min(math) OVER(PARTITION BY classid order by classid) as min3,
min(math) OVER(PARTITION BY classid order by classid ROWS BETWEEN 1 PRECEDING AND 2 FOLLOWING) as min4
FROM student_scores
where departmentid='department1'

在这里插入图片描述

max()

SELECT studentid,math,departmentid,classid,
max(math) over() as max1,
max(math) over(PARTITION BY classid) as max2,
max(math) over(PARTITION BY classid order by math) as max3,
max(math) over(PARTITION BY classid order by math ROWS BETWEEN 1 PRECEDING and 2 FOLLOWING) as max4
FROM student_scores
where departmentid='department1'

在这里插入图片描述

avg()

SELECT studentid,math,departmentid,classid,
round(avg(math) over(),2) as max1,
round(avg(math) over(PARTITION BY classid) ,2) as max2,
round(avg(math) over(PARTITION BY classid order by math) ,2) as max3,
round(avg(math) over(PARTITION BY classid order by math ROWS BETWEEN 1 PRECEDING and 2 FOLLOWING) ,2) as max4
FROM student_scores
where departmentid='department1'

在这里插入图片描述

first_value()

SELECT studentid,math,departmentid,classid,
first_value(math) over() as first_value1,
first_value(math) OVER(PARTITION BY classid) as first_value2,
first_value(math) OVER(PARTITION BY classid ORDER BY math) as first_value3,
first_value(math) OVER(PARTITION BY classid ORDER BY math ROWS BETWEEN 1 PRECEDING and 2 FOLLOWING) as first_value4
FROM student_scores
where departmentid='department1'

在这里插入图片描述

last_value()

SELECT studentid,math,departmentid,classid,
last_value(math) over() as last_value1,
last_value(math) OVER(PARTITION BY classid) as last_value2,
last_value(math) OVER(PARTITION BY classid ORDER BY math) as last_value3,
last_value(math) OVER(PARTITION BY classid ORDER BY math ROWS BETWEEN 1 PRECEDING and 2 FOLLOWING) as last_value4
FROM student_scores
where departmentid='department1'

在这里插入图片描述

lag()

SELECT studentid,math,departmentid,classid,
lag(math,2,60) over(PARTITION BY classid order by math) as lag1,
lag(math,2) over(PARTITION BY classid order by math) as lag2
FROM student_scores
where departmentid='department1'

在这里插入图片描述

lead()

SELECT studentid,math,departmentid,classid,
lead(math,2,60) over(PARTITION BY classid order by math) as lead1,
lead(math,2) over(PARTITION BY classid order by math) as lead2
FROM student_scores
where departmentid='department1'

在这里插入图片描述

cume_dist()

SELECT studentid,math,departmentid,classid,
round(cume_dist() over(order by math),2) as cum_dist1,
round(cume_dist() OVER(order by math desc) ,2)as cum_dist2,
round(cume_dist() OVER(PARTITION BY classid order by math desc),2) as cum_dist3
FROM student_scores
where departmentid='department1'

在这里插入图片描述

rank()

SELECT studentid,math ,
rank() OVER(ORDER BY math) as rank1,
rank() OVER(PARTITION BY departmentid ORDER BY math) as rank2,
rank() over(PARTITION BY departmentid,classid ORDER BY math) as rank3
FROM student_scores

在这里插入图片描述

dense_rank()

SELECT studentid,math ,
dense_rank() OVER(ORDER BY math) as dense_rank1,
dense_rank() OVER(PARTITION BY departmentid ORDER BY math) as dense_rank2,
dense_rank() over(PARTITION BY departmentid,classid ORDER BY math) as dense_rank3
FROM student_scores

在这里插入图片描述

row_number()

SELECT studentid,departmentid,classid,math,
row_number() over(PARTITION BY departmentid,classid ORDER BY math) as row_number
FROM student_scores

在这里插入图片描述

percent_rank()

SELECT studentid,departmentid,classid,math,
row_number() over(PARTITION BY departmentid,classid ORDER BY math) as row_number1,
percent_rank() OVER(PARTITION BY departmentid,classid ORDER BY math) as percent_rank1
FROM student_scores

在这里插入图片描述

ntile(n)

SELECT studentid,math,
ntile(2) over(PARTITION BY departmentid order by math) as ntile1,
ntile(3) over(PARTITION BY departmentid order by math) as ntile2
FROM student_scores

在这里插入图片描述

练习2

练习2
创建并插入数据

-- INSERT INTO hivewindowtest1 VALUES

-- ('tony','2017-01-02',15),

-- ('jack','2017-02-03',23),

-- ('tony','2017-01-04',29),

-- ('jack','2017-01-05',46),

-- ('jack','2017-04-06',42),

-- ('tony','2017-01-07',50),

-- ('jack','2017-01-08',55),

-- ('mart','2017-04-08',62),

-- ('mart','2017-04-09',68),

-- ('neil','2017-05-10',12),

-- ('mart','2017-04-11',75),

-- ('neil','2017-06-12',80),

-- ('mart','2017-04-13',94);

– (1)查询在2017年4月份购买过的顾客及总人数

SELECT name,count(*) over()
FROM hivewindowtest1
where substring(orderdate,1,7)='2017-04'
group by name

在这里插入图片描述
– (2)查询顾客的购买明细及月购买总额

SELECT *,
sum(cost) OVER(PARTITION BY name,month(orderdate))
FROM hivewindowtest1

在这里插入图片描述

– (3)上述的场景,要将cost按照日期进行累加

SELECT *,
sum(cost) OVER(PARTITION BY name ORDER BY orderdate)
FROM hivewindowtest1

在这里插入图片描述
– (4)查询顾客上次的购买时间

SELECT *,
lag(orderdate,1) OVER(PARTITION BY name ORDER BY orderdate) as lastbuytime
FROM hivewindowtest1

– (5)查询前20%时间的订单信息

SELECT name,orderdate,cost from 
(SELECT name,orderdate,cost,
ntile(5) over(ORDER BY orderdate) as hh
FROM hivewindowtest1) TT
where TT.hh=1

在这里插入图片描述

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值