MySQL8新特性——窗口函数
https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
准备数据
CREATE TABLE `sales` (
`year` int(11) DEFAULT NULL,
`country` varchar(20) CHARACTER SET utf8 COLLATE utf8_general_ci DEFAULT NULL,
`product` varchar(32) CHARACTER SET utf8 COLLATE utf8_general_ci DEFAULT NULL,
`profit` int(11) DEFAULT NULL
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;
INSERT INTO `sales` VALUES (2000, 'Finland', 'Computer', 1500);
INSERT INTO `sales` VALUES (2000, 'Finland', 'Phone', 100);
INSERT INTO `sales` VALUES (2001, 'Finland', 'Phone', 10);
INSERT INTO `sales` VALUES (2000, 'India', 'Calculator', 75);
INSERT INTO `sales` VALUES (2000, 'India', 'Calculator', 75);
INSERT INTO `sales` VALUES (2000, 'India', 'Computer', 1200);
INSERT INTO `sales` VALUES (2000, 'USA', 'Calculator', 75);
INSERT INTO `sales` VALUES (2000, 'USA', 'Computer', 1500);
INSERT INTO `sales` VALUES (2001, 'USA', 'Calculator', 50);
INSERT INTO `sales` VALUES (2001, 'USA', 'Computer', 1500);
INSERT INTO `sales` VALUES (2001, 'USA', 'Computer', 1200);
INSERT INTO `sales` VALUES (2001, 'USA', 'TV', 150);
INSERT INTO `sales` VALUES (2001, 'USA', 'TV', 100);
函数使用
SELECT
year, country, product, profit,
SUM(profit) OVER() AS total_profit,
SUM(profit) OVER(PARTITION BY country) AS country_profit
FROM sales
ORDER BY country, year, product, profit;
使用OVER()将数据分组后,还可以调用之前的聚合函数获取结果。
AVG()
BIT_AND()
BIT_OR()
BIT_XOR()
COUNT()
JSON_ARRAYAGG()
JSON_OBJECTAGG()
MAX()
MIN()
STDDEV_POP(), STDDEV(), STD()
STDDEV_SAMP()
SUM()
VAR_POP(), VARIANCE()
VAR_SAMP()
SELECT
year, country, product, profit,
ROW_NUMBER() OVER(w) AS row_num1,
ROW_NUMBER() OVER(w ORDER BY year, product) AS row_num2
FROM sales WINDOW w as (PARTITION BY country);
可以用 WINDOW w as () 来定义之前 OVER() 中的内容,但是OVER() 中不可再使用之前定义用过的关键字。w为变量名,自己定义。这样w可以多次使用。
全部函数
窗口函数总体上可以分为序号函数、分布函数、前后函数、首尾函数和其他函数,如下表:
函数分类 | 函数 | 函数说明 | |
---|---|---|---|
1 | 序号函数 | ROW_NUMBER() | 顺序排序 |
2 | 序号函数 | RANK() | 并列排序,会跳过重复的序号,比如序号为1、1、3 |
3 | 序号函数 | DENSE_RANK() | 并列排序,不会跳过重复的序号,比如序号为1、1、2 |
4 | 分布函数 | PERCENT_RANK() | 等级值百分比 |
5 | 分布函数 | CUME_DIST() | 累积分布值 |
6 | 前后函数 | LAG(expr,n) | 返回当前行的前n行的expr的值 |
7 | 前后函数 | LEAD(expr,n) | 返回当前行的后n行的expr的值 |
8 | 首尾函数 | FIRST_VALUE(expr) | 返回第一个expr的值 |
9 | 首尾函数 | LAST_VALUE(expr) | 返回最后一个expr的值 |
10 | 其他函数 | NTH_VALUE(expr,n) | 返回第n个expr的值 |
11 | 其他函数 | NTILE(n) | 将分区中的有序数据分为n个桶,记录桶编号 |
最全使用,方便分析
SELECT
year, country, product, profit,
ROW_NUMBER() OVER(w1 ORDER BY year, product) as rowNumber,
RANK() OVER(w1 ORDER BY year, product) as rank1,
DENSE_RANK() OVER(w1 ORDER BY year, product) as denseRank,
PERCENT_RANK() OVER(w1 ORDER BY year, product) as percentRank,
CUME_DIST() OVER(w1 ORDER BY year, product) as cumeDist,
LAG(profit,2) OVER(w1 ORDER BY year, product) as lag1,
LEAD(profit,2) OVER(w1 ORDER BY year, product) lead1,
FIRST_VALUE(profit) OVER(w1 ORDER BY year, product) as firstValue,
LAST_VALUE(profit) OVER(w1 ORDER BY year, product) as lastValue,
NTH_VALUE(profit,2) OVER(w1 ORDER BY year, product) as nthValue,
NTILE(2) OVER(w1 ORDER BY year, product) as ntile1
FROM sales WINDOW w1 as (PARTITION BY country) ;