创建测试数据
测试数据内容见:测试数据oil_ext.txt
CREATE TABLE t(
id int,
name varchar(20),
class int,
score int
);
INSERT INTO t VALUES(1,'math',1,90);
INSERT INTO t VALUES(2,'math',2,80);
INSERT INTO t VALUES(3,'math',1,70);
INSERT INTO t VALUES(4,'chinese',2,60);
INSERT INTO t VALUES(5,'chinese',1,50);
INSERT INTO t VALUES(6,'chinese',2,60);
INSERT INTO t VALUES(7,'physical',1,70);
INSERT INTO t VALUES(8,'physical',2,80);
INSERT INTO t VALUES(9,'physical',1,90);
SELECT
*
FROM
t;
id | name | class | score
----+----------+-------+-------
1 | math | 1 | 90
2 | math | 2 | 80
3 | math | 1 | 70
4 | chinese | 2 | 60
5 | chinese | 1 | 50
6 | chinese | 2 | 60
7 | physical | 1 | 70
8 | physical | 2 | 80
9 | physical | 1 | 90
(9 rows)
应用分组集
普通的GROUP BY
SELECT
name,
class,
sum(score)
FROM
t
GROUP BY
name,
class
ORDER BY
name,
class;
name | class | sum
----------+-------+-----
chinese | 1 | 50
chinese | 2 | 120
math | 1 | 160
math | 2 | 80
physical | 1 | 160
physical | 2 | 80
(6 rows)
GROUPING SETS
GROUPING SETS
的每个子列表可以指定零个或多个列或表达式,并且与其直接在GROUP BY
子句中的解释方式相同。一个空的分组集合意味着所有的行都被聚合到一个组中。
SELECT
name,
class,
sum(score)
FROM
t
GROUP BY
GROUPING SETS (
(name),
(class),
()
)
ORDER BY
name,
class;
name | class | sum
----------+-------+-----
chinese | | 170
math | | 240
physical | | 240
| 1 | 370
| 2 | 280
| | 650
(6 rows)
默认的GROUP BY
语句相当于在GROUPING SETS
后的参数填上所有GROUP BY
的字段:
SELECT
name,
class,
sum(score)
FROM
t
GROUP BY
GROUPING SETS (
(name, class)
)
ORDER BY
name,
class;
name | class | sum
----------+-------+-----
chinese | 1 | 50
chinese | 2 | 120
math | 1 | 160
math | 2 | 80
physical | 1 | 160
physical | 2 | 80
(6 rows)
与不使用GROUPING SETS
语句时的结果完全相同。
实际例子
在分组统计总和之外附加所有组的总和,命名为total,同时需要统计平均值。
SELECT
coalesce(name,'total') AS name, -- name为空时,使用字符串total
class,
sum(score),
round(avg(score),2) AS avg -- 保留两位小数
FROM
t
GROUP BY
GROUPING SETS (
(name, class),
()
)
ORDER BY
name,
class;
name | class | sum | avg
----------+-------+-----+-------
chinese | 1 | 50 | 50.00
chinese | 2 | 120 | 60.00
math | 1 | 160 | 80.00
math | 2 | 80 | 80.00
physical | 1 | 160 | 80.00
physical | 2 | 80 | 80.00
total | | 650 | 72.22
(7 rows)
ROLLUP
ROLLUP ((a), (b), (c))
等价于GROUPING SETS ((a, b, c), (a, b), (a), ())
。
SELECT
name,
class,
sum(score)
FROM
t
GROUP BY
ROLLUP (
(name),
(class)
)
ORDER BY
name,
class;
name | class | sum
----------+-------+-----
chinese | 1 | 50
chinese | 2 | 120
chinese | | 170
math | 1 | 160
math | 2 | 80
math | | 240
physical | 1 | 160
physical | 2 | 80
physical | | 240
| | 650
(10 rows)
等价于GROUPING SETS ((name, class), (name), ())
。
CUBE
CUBE ((a), (b), (c))
等价于GROUPING SETS ((a, b, c), (a, b), (a, c), (a), (b, c), (b), (c), ())
。
SELECT
name,
class,
sum(score)
FROM
t
GROUP BY
CUBE (
(name),
(class)
)
ORDER BY
name,
class;
name | class | sum
----------+-------+-----
chinese | 1 | 50
chinese | 2 | 120
chinese | | 170
math | 1 | 160
math | 2 | 80
math | | 240
physical | 1 | 160
physical | 2 | 80
physical | | 240
| 1 | 370
| 2 | 280
| | 650
(12 rows)
等价于GROUPING SETS ((name, class), (name), (class), ())
。
性能研究
EXPLAIN
SELECT
name,
class,
sum(score)
FROM
t
GROUP BY
GROUPING SETS (
(name),
(class),
()
)
ORDER BY
name,
class;
QUERY PLAN
--------------------------------------------------------------------
Sort (cost=1.85..1.88 rows=12 width=19)
Sort Key: name, class
-> GroupAggregate (cost=1.23..1.63 rows=12 width=19)
Group Key: name
Group Key: ()
Sort Key: class
Group Key: class
-> Sort (cost=1.23..1.26 rows=9 width=15)
Sort Key: name
-> Seq Scan on t (cost=0.00..1.09 rows=9 width=15)
(10 rows)
分组集是一种强大的特性,有助于减少昂贵的查询的数量。在内部,PostgreSQL基本上会把分组集转成传统的GroupAggregates来实现。GroupAggregates节点要求排序好的数据,因此要做好PostgreSQL可能会执行很多临时排序的准备。