1 基本的GROUP BY 用法
基本的GROUP BY
scott@orclpdb1:orclcdb> select d.dname,count(empno) empcount
2 from SCOTT.dept d
3 left outer join SCOTT.emp e on d.deptno = e.deptno
4 group by d.dname
5 order by d.dname;
DNAME EMPCOUNT
-------------- ----------
ACCOUNTING 2
OPERATIONS 0
RESEARCH 5
SALES 6
4 rows selected.
scott@orclpdb1:orclcdb>
GROUP BY 中所使用的列必须与SELECT 语句中没有使用聚合函数的列相匹配。
GROUP BY 子句中对于列的要求
scott@orclpdb1:orclcdb> select d.dname, d.loc, count(empno) empcount
2 from SCOTT.emp e
3 join SCOTT.Dept d
4 on d.deptno = e.deptno
5 group by d.dname;
select d.dname, d.loc, count(empno) empcount
*
ERROR at line 1:
ORA-00979: not a GROUP BY expression
scott@orclpdb1:orclcdb>
没有排序的GROUP BY
scott@orclpdb1:orclcdb> select deptno,count(*)
2 from emp
3 group by deptno;
DEPTNO COUNT(*)
---------- ----------
30 6
10 3
20 5
3 rows selected.
scott@orclpdb1:orclcdb>
复杂的SQL
scott@orclpdb1:orclcdb> select distinct dname,
2 decode(d.deptno,
3 10,
4 (select count(*) from emp where deptno = 10),
5 20,
(select count(*) from emp where deptno = 20),
7 30,
8 (select count(*) from emp where deptno = 30),
(select count(*)
10 from emp
11 where deptno not in (10, 20, 30))) dept_count
12 from (select distinct deptno from emp) d
13 join dept d2
14 on d2.deptno = d.deptno;
DNAME DEPT_COUNT
-------------- ----------
RESEARCH 5
ACCOUNTING 3
SALES 6
3 rows selected.
scott@orclpdb1:orclcdb>
GROUP BY 除了极大的简化必须写的SQL语句以外,GROUP BY 子句还消除了数据库不必要的IO.
不要低估GROUP BY子句的能力。GROUP BY 子句具有以下优点:
- 使SQL语句更具可读性。
- 书写起来比使用很多相关子查询更简单。
- 减少了重复访问同一个数据块的次数。(提升了性能)
HAVING 子句
GROUP BY 产生的结果可以通过HAVING子句给出的筛选标准来进行限制。
scott@orclpdb1:orclcdb> select d.dname, trunc(e.hiredate, 'YYYY') hiredate, count(empno) empcount
2 from SCOTT.EMP e
3 join scott.dept d
4 on d.deptno = e.deptno
5 group by d.dname, trunc(e.hiredate, 'YYYY')
6 having count(empno) >= 5 and trunc(e.hiredate, 'YYYY') between (select min(hiredate)
7 from scott.emp) and (select max(hiredate)
from scott.emp)
9 order by d.dname;
DNAME HIREDATE EMPCOUNT
-------------- ----------------------------- ----------
SALES 01-JAN-1981 00:00:00 6
1 row selected.
scott@orclpdb1:orclcdb>
GROUP BY 的CUBE扩展
HR.EMPLOYEES表的CUBE运算
hr@orclpdb1:orclcdb>
hr@orclpdb1:orclcdb> select last_name, first_name
2 from hr.employees
3 group by first_name, last_name;
LAST_NAME FIRST_NAME
------------------------- --------------------
Abel Ellen
Ande Sundar
Atkinson Mozhe
Austin David
Baer Hermann
Baida Shelli
Banda Amit
Bates Elizabeth
Bell Sarah
Bernstein David
Bissot Laura
Bloom Harrison
Bull Alexis
Cabrio Anthony
Cambrault Gerald
Cambrault Nanette
Chen John
Chung Kelly
Colmenares Karen
Davies Curtis
De Haan Lex
Dellinger Julia
Dilly Jennifer
Doran Louise
Ernst Bruce
Errazuriz Alberto
Everett Britney
Faviet Daniel
Fay Pat
Feeney Kevin
Fleaur Jean
Fox Tayler
Fripp Adam
Gates Timothy
Gee Ki
Geoni Girard
Gietz William
Grant Douglas
Grant Kimberely
Greenberg Nancy
Greene Danielle
Hall Peter
Hartstein Michael
Higgins Shelley
Himuro Guy
Hunold Alexander
Hutton Alyssa
Johnson Charles
Jones Vance
Kaufling Payam
Khoo Alexander
King Janette
King Steven
Kochhar Neena
Kumar Sundita
Ladwig Renske
Landry James
Lee David
Livingston Jack
Lorentz Diana
Mallin Jason
Markle Steven
Marlow James
Marvins Mattea
Matos Randall
Mavris Susan
McCain Samuel
McEwen Allan
Mikkilineni Irene
Mourgos Kevin
Nayer Julia
OConnell Donald
Olsen Christopher
Olson TJ
Ozer Lisa
Partners Karen
Pataballa Valli
Patel Joshua
Perkins Randall
Philtanker Hazel
Popp Luis
Rajs Trenna
Raphaely Den
Rogers Michael
Russell John
Sarchand Nandita
Sciarra Ismael
Seo John
Sewall Sarath
Smith Lindsey
Smith William
Stiles Stephen
Sullivan Martha
Sully Patrick
Taylor Jonathon
Taylor Winston
Tobias Sigal
Tucker Peter
Tuvault Oliver
Urman Jose Manuel
Vargas Peter
Vishney Clara
Vollman Shanta
Walsh Alana
Weiss Matthew
Whalen Jennifer
Zlotkey Eleni
107 rows selected.
hr@orclpdb1:orclcdb>
hr@orclpdb1:orclcdb> set autotrace on
hr@orclpdb1:orclcdb>
with emps as
2 (select last_name, first_name
from hr.employees
4 group by cube(first_name, last_name))
5 select rownum, last_name, first_name from emps;
ROWNUM LAST_NAME FIRST_NAME
---------- ------------------------- --------------------
1
2 Ki
3 TJ
4 Den
5 Guy
6 Lex
7 Pat
8 Adam
9 Amit
10 Jack
11 Jean
12 John
13 Lisa
14 Luis
15 Alana
16 Allan
17 Bruce
18 Clara
19 David
20 Diana
21 Eleni
22 Ellen
23 Hazel
24 Irene
25 James
26 Jason
27 Julia
28 Karen
29 Kelly
30 Kevin
31 Laura
32 Mozhe
33 Nancy
34 Neena
35 Payam
36 Peter
37 Sarah
38 Sigal
39 Susan
40 Valli
41 Vance
42 Alexis
43 Alyssa
44 Curtis
45 Daniel
46 Donald
47 Gerald
48 Girard
49 Ismael
50 Joshua
51 Louise
52 Martha
53 Mattea
54 Oliver
55 Renske
56 Samuel
57 Sarath
58 Shanta
59 Shelli
60 Steven
61 Sundar
62 Tayler
63 Trenna
64 Alberto
65 Anthony
66 Britney
67 Charles
68 Douglas
69 Hermann
70 Janette
71 Lindsey
72 Matthew
73 Michael
74 Nandita
75 Nanette
76 Patrick
77 Randall
78 Shelley
79 Stephen
80 Sundita
81 Timothy
82 William
83 Winston
84 Danielle
85 Harrison
86 Jennifer
87 Jonathon
88 Alexander
89 Elizabeth
90 Kimberely
91 Christopher
92 Jose Manuel
93 Fay
94 Fay Pat
95 Fox
96 Fox Tayler
97 Gee
98 Gee Ki
99 Lee
100 Lee David
101 Seo
102 Seo John
103 Abel
104 Abel Ellen
105 Ande
106 Ande Sundar
107 Baer
108 Baer Hermann
109 Bell
110 Bell Sarah
111 Bull
112 Bull Alexis
113 Chen
114 Chen John
115 Hall
116 Hall Peter
117 Khoo
118 Khoo Alexander
119 King
120 King Steven
121 King Janette
122 Ozer
123 Ozer Lisa
124 Popp
125 Popp Luis
126 Rajs
127 Rajs Trenna
128 Baida
129 Baida Shelli
130 Banda
131 Banda Amit
132 Bates
133 Bates Elizabeth
134 Bloom
135 Bloom Harrison
136 Chung
137 Chung Kelly
138 Dilly
139 Dilly Jennifer
140 Doran
141 Doran Louise
142 Ernst
143 Ernst Bruce
144 Fripp
145 Fripp Adam
146 Gates
147 Gates Timothy
148 Geoni
149 Geoni Girard
150 Gietz
151 Gietz William
152 Grant
153 Grant Douglas
154 Grant Kimberely
155 Jones
156 Jones Vance
157 Kumar
158 Kumar Sundita
159 Matos
160 Matos Randall
161 Nayer
162 Nayer Julia
163 Olsen
164 Olsen Christopher
165 Olson
166 Olson TJ
167 Patel
168 Patel Joshua
169 Smith
170 Smith Lindsey
171 Smith William
172 Sully
173 Sully Patrick
174 Urman
175 Urman Jose Manuel
176 Walsh
177 Walsh Alana
178 Weiss
179 Weiss Matthew
180 Austin
181 Austin David
182 Bissot
183 Bissot Laura
184 Cabrio
185 Cabrio Anthony
186 Davies
187 Davies Curtis
188 Faviet
189 Faviet Daniel
190 Feeney
191 Feeney Kevin
192 Fleaur
193 Fleaur Jean
194 Greene
195 Greene Danielle
196 Himuro
197 Himuro Guy
198 Hunold
199 Hunold Alexander
200 Hutton
201 Hutton Alyssa
202 Ladwig
203 Ladwig Renske
204 Landry
205 Landry James
206 Mallin
207 Mallin Jason
208 Markle
209 Markle Steven
210 Marlow
211 Marlow James
212 Mavris
213 Mavris Susan
214 McCain
215 McCain Samuel
216 McEwen
217 McEwen Allan
218 Rogers
219 Rogers Michael
220 Sewall
221 Sewall Sarath
222 Stiles
223 Stiles Stephen
224 Taylor
225 Taylor Winston
226 Taylor Jonathon
227 Tobias
228 Tobias Sigal
229 Tucker
230 Tucker Peter
231 Vargas
232 Vargas Peter
233 Whalen
234 Whalen Jennifer
235 De Haan
236 De Haan Lex
237 Everett
238 Everett Britney
239 Higgins
240 Higgins Shelley
241 Johnson
242 Johnson Charles
243 Kochhar
244 Kochhar Neena
245 Lorentz
246 Lorentz Diana
247 Marvins
248 Marvins Mattea
249 Mourgos
250 Mourgos Kevin
251 Perkins
252 Perkins Randall
253 Russell
254 Russell John
255 Sciarra
256 Sciarra Ismael
257 Tuvault
258 Tuvault Oliver
259 Vishney
260 Vishney Clara
261 Vollman
262 Vollman Shanta
263 Zlotkey
264 Zlotkey Eleni
265 Atkinson
266 Atkinson Mozhe
267 Kaufling
268 Kaufling Payam
269 OConnell
270 OConnell Donald
271 Partners
272 Partners Karen
273 Raphaely
274 Raphaely Den
275 Sarchand
276 Sarchand Nandita
277 Sullivan
278 Sullivan Martha
279 Bernstein
280 Bernstein David
281 Cambrault
282 Cambrault Gerald
283 Cambrault Nanette
284 Dellinger
285 Dellinger Julia
286 Errazuriz
287 Errazuriz Alberto
288 Greenberg
289 Greenberg Nancy
290 Hartstein
291 Hartstein Michael
292 Pataballa
293 Pataballa Valli
294 Colmenares
295 Colmenares Karen
296 Livingston
297 Livingston Jack
298 Philtanker
299 Philtanker Hazel
300 Mikkilineni
301 Mikkilineni Irene
301 rows selected.
Execution Plan
----------------------------------------------------------
Plan hash value: 237365549
----------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
----------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 107 | 2782 | 1 (0)| 00:00:01 |
| 1 | COUNT | | | | | |
| 2 | VIEW | | 107 | 2782 | 1 (0)| 00:00:01 |
| 3 | SORT GROUP BY | | 107 | 1605 | 1 (0)| 00:00:01 |
| 4 | GENERATE CUBE | | 107 | 1605 | 1 (0)| 00:00:01 |
| 5 | SORT GROUP BY NOSORT| | 107 | 1605 | 1 (0)| 00:00:01 |
| 6 | INDEX FULL SCAN | EMP_NAME_IX | 107 | 1605 | 1 (0)| 00:00:01 |
----------------------------------------------------------------------------------------
Statistics
----------------------------------------------------------
0 recursive calls
0 db block gets
1 consistent gets
0 physical reads
0 redo size
9301 bytes sent via SQL*Net to client
735 bytes received via SQL*Net from client
22 SQL*Net roundtrips to/from client
1 sorts (memory)
0 sorts (disk)
301 rows processed
hr@orclpdb1:orclcdb>
预测CUBE返回行数
hr@orclpdb1:orclcdb> with counts as
2 (select count(distinct first_name) first_name_count,
3 count(distinct last_name) last_name_count,
4 count(distinct(first_name || last_name)) full_name_count
5 from hr.employees)
6 select first_name_count,
7 last_name_count,
8 full_name_count,
9 first_name_count + last_name_count + full_name_count + 1 total_count
10 from counts;
FIRST_NAME_COUNT LAST_NAME_COUNT FULL_NAME_COUNT TOTAL_COUNT
---------------- --------------- --------------- -----------
91 102 107 301
1 row selected.
Execution Plan
----------------------------------------------------------
Plan hash value: 1750451867
---------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
---------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 39 | 1 (0)| 00:00:01 |
| 1 | VIEW | | 1 | 39 | 1 (0)| 00:00:01 |
| 2 | SORT GROUP BY | | 1 | 15 | | |
| 3 | INDEX FULL SCAN| EMP_NAME_IX | 107 | 1605 | 1 (0)| 00:00:01 |
---------------------------------------------------------------------------------
Statistics
----------------------------------------------------------
1 recursive calls
0 db block gets
1 consistent gets
0 physical reads
0 redo size
821 bytes sent via SQL*Net to client
963 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
1 sorts (memory)
0 sorts (disk)
1 rows processed
hr@orclpdb1:orclcdb>
CUBE扩展不仅仅是减少产生与UNION ALL和GROUP BY 组合同样的数据所需要的SQL语句,而且还会将索引全扫描的次数从4次减到1次。优化器选择使用EMP_EMAIL_UK索引而不是EMP_NAME_IX索引
hr@orclpdb1:orclcdb> with emps as
2 (select last_name, first_name from hr.employees),
3 mycube as
4 (select last_name, first_name
5 from emps
union all
7 select last_name, null first_name
8 from emps
9 union all
10 select null last_name, first_name
11 from emps
12 union all
13 select null last_name, null first_name
14 from emps)
select /*+ gather_plan_statistics*/
16 *
17 from mycube
18 group by last_name, first_name;
LAST_NAME FIRST_NAME
------------------------- --------------------
Doran Louise
Ernst Bruce
Gietz William
Jones Vance
Lorentz Diana
Nayer Julia
Ozer Lisa
Weiss Matthew
Austin
Banda
Bates
Bernstein
Bull
Davies
Everett
Khoo
Rogers
Seo
Taylor
Tobias
Amit
Curtis
Adam
Vance
Payam
Sundita
Randall
Christopher
Lisa
Stephen
Jonathon
Alana
Chung Kelly
Faviet Daniel
Greene Danielle
Hartstein Michael
Kaufling Payam
Kochhar Neena
Kumar Sundita
Mavris Susan
Pataballa Valli
Smith William
Sullivan Martha
Vollman Shanta
Atkinson
Baer
Bloom
Dellinger
Ernst
Errazuriz
Kaufling
Kochhar
Lee
Marlow
Marvins
McEwen
Nayer
Patel
Rajs
Walsh
Alberto
Timothy
William
Douglas
Alexander
Janette
Steven
Diana
Dilly Jennifer
Everett Britney
Fay Pat
Fox Tayler
Higgins Shelley
Himuro Guy
Ladwig Renske
Landry James
Mourgos Kevin
Olsen Christopher
Partners Karen
Rogers Michael
Smith Lindsey
Sully Patrick
Taylor Jonathon
Taylor Winston
Tobias Sigal
Bell
Cabrio
Faviet
Greene
Hutton
Jones
Livingston
Matos
Mavris
Olson
Perkins
Sewall
Stiles
Urman
Vishney
Mozhe
Hermann
Elizabeth
Laura
Gerald
Karen
Jennifer
Bruce
Britney
Pat
Tayler
Kimberely
Peter
Jason
Susan
Irene
Donald
Valli
Oliver
Eleni
Abel Ellen
Austin David
Banda Amit
Grant Douglas
Mallin Jason
Olson TJ
Patel Joshua
Philtanker Hazel
Tucker Peter
Vishney Clara
Colmenares
Fleaur
Fripp
King
Kumar
Landry
Olsen
Pataballa
Tuvault
Lex
Danielle
Renske
Trenna
Sarath
Matthew
Atkinson Mozhe
Bissot Laura
Cambrault Nanette
Gates Timothy
Greenberg Nancy
Marvins Mattea
Perkins Randall
Sarchand Nandita
Vargas Peter
Baida
Feeney
Fox
Gietz
McCain
Ozer
Popp
David
Alexis
Nanette
John
Louise
Ki
Guy
Joshua
Ismael
Clara
Ande Sundar
Cabrio Anthony
Errazuriz Alberto
Fripp Adam
Geoni Girard
Grant Kimberely
Hutton Alyssa
Khoo Alexander
Lee David
Matos Randall
McCain Samuel
OConnell Donald
Seo John
Sewall Sarath
Stiles Stephen
Ande
Dilly
Fay
Higgins
Himuro
Mikkilineni
Mourgos
OConnell
Raphaely
Russell
Anthony
Julia
Jean
Charles
Luis
Nandita
Bates Elizabeth
Bell Sarah
Bernstein David
Bloom Harrison
Bull Alexis
Cambrault Gerald
Davies Curtis
De Haan Lex
Hall Peter
Hunold Alexander
Livingston Jack
Raphaely Den
Sciarra Ismael
Tuvault Oliver
Urman Jose Manuel
Walsh Alana
Whalen Jennifer
Zlotkey Eleni
Bissot
Chung
Doran
Gee
Hall
Hunold
Ladwig
Lorentz
Mallin
Markle
Partners
Sciarra
Tucker
Vargas
Weiss
Ellen
Sundar
Harrison
Kevin
Girard
Shelley
Alyssa
Neena
Jack
Mattea
Samuel
TJ
Den
Lindsey
Martha
Winston
Jose Manuel
Baer Hermann
Baida Shelli
Chen John
Colmenares Karen
Dellinger Julia
Feeney Kevin
Fleaur Jean
Gee Ki
Johnson Charles
King Janette
King Steven
Markle Steven
Marlow James
McEwen Allan
Mikkilineni Irene
Popp Luis
Rajs Trenna
Russell John
Abel
Cambrault
Chen
De Haan
Gates
Geoni
Grant
Greenberg
Hartstein
Johnson
Philtanker
Sarchand
Smith
Sullivan
Sully
Vollman
Whalen
Zlotkey
Shelli
Sarah
Kelly
Daniel
Nancy
Michael
James
Allan
Hazel
Patrick
Sigal
Shanta
301 rows selected.
Execution Plan
----------------------------------------------------------
Plan hash value: 2717078830
-----------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-----------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 428 | 11128 | 5 (20)| 00:00:01 |
| 1 | HASH GROUP BY | | 428 | 11128 | 5 (20)| 00:00:01 |
| 2 | VIEW | | 428 | 11128 | 4 (0)| 00:00:01 |
| 3 | UNION-ALL | | | | | |
| 4 | INDEX FULL SCAN| EMP_NAME_IX | 107 | 1605 | 1 (0)| 00:00:01 |
| 5 | INDEX FULL SCAN| EMP_NAME_IX | 107 | 856 | 1 (0)| 00:00:01 |
| 6 | INDEX FULL SCAN| EMP_NAME_IX | 107 | 749 | 1 (0)| 00:00:01 |
| 7 | INDEX FULL SCAN| EMP_EMAIL_UK | 107 | | 1 (0)| 00:00:01 |
-----------------------------------------------------------------------------------
Statistics
----------------------------------------------------------
1 recursive calls
0 db block gets
4 consistent gets
0 physical reads
0 redo size
8767 bytes sent via SQL*Net to client
989 bytes received via SQL*Net from client
22 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
301 rows processed
hr@orclpdb1:orclcdb>
CUBE的实际应用
无法显示,由于costs表中没有数据。
with tsales as
(select s.quantity_sold,
s.amount_sold,
to_char(mod(cust_year_of_birth, 10) * 10) || '-' ||
to_char((mod(cust_year_of_birth, 10) * 10) + 10) age_range,
nvl(c.cust_income_level, 'A: Below 30,000') cust_income_level,
p.prod_name,
p.prod_desc,
p.prod_category
-- (pf.unit_cost * s.quantity_sold) total_cost,
-- s.amount_sold - (pf.unit_cost * s.quantity_sold) profit
from sh.sales s
join sh.customers c
on c.cust_id = s.cust_id
join sh.products p
on p.prod_id = s.prod_id
join sh.times t
on t.time_id = s.time_id
--join sh.costs pf
-- on pf.channel_id = s.channel_id
--and pf.prod_id = s.prod_id
--and pf.promo_id = s.promo_id
--and pf.time_id = s.time_id
where (t.fiscal_year = 2001)),
gb as
(select -- Q1 - all categories by cust income and age range
'Q1' query_tag,
prod_category,
cust_income_level,
age_range--,
--sum(profit) profit
from tsales
group by prod_category, cust_income_level, age_range
union all
select -- Q2 - all categories by cust age range
'Q2' query_tag,
prod_category,
'ALL INCOME' cust_income_level,
age_range--,
--sum(profit) profit
from tsales
group by prod_category, 'ALL INCOME', age_range
union all
select -- Q3 - all categories by cust income
'Q3' query_tag,
prod_category,
cust_income_level,
'ALL AGE' age_range--,
--sum(profit) profit
from tsales
group by prod_category, cust_income_level, 'ALL AGE'
union all
select -- Q4 - all categories
'Q4' query_tag,
prod_category,
'ALL INCOME' cust_income_level,
'ALL AGE' age_range--,
--sum(profit) profit
from tsales
group by prod_category, 'ALL INCOME', 'ALL AGE')
select * from gb order by prod_category/*, profit*/;
用CUBE替代UNION ALL
with tsales as
(select /*+gather_plan_statistics*/
s.quantity_sold,
s.amount_sold,
to_char(mod(cust_year_of_birth, 10) * 10) || '-' ||
to_char((mod(cust_year_of_birth, 10) * 10) + 10) age_range,
nvl(c.cust_income_level, 'A: Below 30,000') cust_income_level,
p.prod_name,
p.prod_desc,
p.prod_category
-- (pf.unit_cost * s.quantity_sold) total_cost,
-- s.amount_sold - (pf.unit_cost * s.quantity_sold) profit
from sh.sales s
join sh.customers c
on c.cust_id = s.cust_id
join sh.products p
on p.prod_id = s.prod_id
join sh.times t
on t.time_id = s.time_id
--join sh.costs pf
-- on pf.channel_id = s.channel_id
--and pf.prod_id = s.prod_id
--and pf.promo_id = s.promo_id
--and pf.time_id = s.time_id
where (t.fiscal_year = 2001))
select 'Q' || decode(cust_income_level,
null,
decode(age_range, null, 4, 3),
decode(age_range, null, 2, 1)) query_tag,
prod_category,
cust_income_level,
age_range
--, sum(profit) profit
from tsales
group by prod_category, cube(cust_income_level, age_range)
order by prod_category /*,profit*/
;