手头有个项目,要分析系统错误日志,根据分析结果,提出可能存在的问题。
数据量比较大(千万条记录),如果不使用分组函数各个击破,简直无处着手。
本节中的rollup, cube, grouping sets是第一次学习,对于统计报表的编程比较有用。
--TBC 2010-10-09
--9.4 分组查询
--9.4.1 理解分组查询
--分组查询,统计emp表中各个部门的员工人数
SELECT deptno as "部门编号", COUNT(*) as "部门人数"
FROM emp
GROUP BY deptno
ORDER BY 部门人数;
--9.4.2 分组函数
--1. 记录条数统计
select count(*) from emp;
select count(*) from emp where deptno=20;
select count(comm) from emp;
select count(all comm) from emp;
select count(distinct comm) from emp;
select count(*) from emp where comm is null;
--2. 汇总和平均值计算
select sum(sal), sum(comm) from emp;--54311.08
select avg(sal), avg(comm) from emp;--2361.35
select 54311.08/2361.35 from dual;
select (select sum(comm) from emp)/(select count(*) from emp) 平均佣金_所有员工,
(select sum(comm) from emp)/(select count(comm) from emp) 平均佣金_佣金员工
from dual;
--3. 最小值和最大值
select min(sal), max(sal) from emp;
select min(hiredate),max(hiredate) from emp;
select min(ename),max(ename) from emp;
--4. 统计函数的null值处理
--实用nvl()函数转换null值
select min(nvl(comm,0)) 最低提成, max(nvl(comm,0)) 最高提成 from emp;
--9.4.3 使用group by子句分组
--用于分组的列可以 不出现在select语句中,反之则不然
--统计各个职位的员工人数
SELECT job as "职位", COUNT(empno) as "人数" FROM emp GROUP BY job;
--错误的GROUP BY语句用法,将会导致出现错误
SELECT job, deptno, COUNT(empno) as "人数" FROM emp GROUP BY job;
SELECT SUM(sal) 职位薪资小计, AVG(sal) 职位薪资平均值,job
FROM emp
GROUP BY job
ORDER BY SUM(sal);
--错误的语法,有分组函数但是没有GROUP BY子句
SELECT deptno, SUM(sal) 薪资小计 FROM emp;
--解决方法1:统计薪资综合
select SUM(sal) 薪资小计 FROM emp;
--解决方法2:加入分组条件
select deptno,SUM(sal) 薪资小计 FROM emp group by deptno order by 薪资小计 desc ;
--多列分组(这个比较实用)
SELECT deptno, job, COUNT(empno) as "员工人数"
FROM emp
GROUP BY deptno, job
ORDER BY 员工人数;
--分组中包含NULL列的结果
SELECT cate_id, COUNT(book_id)
FROM books
GROUP BY cate_id
ORDER BY cate_id NULLS LAST;
--9.4.4 使用having子句限制结果集
--使用WHERE过滤分组结果,错误的用法
SELECT deptno, AVG(sal) FROM emp WHERE AVG(sal) > 8000 GROUP BY deptno;
--使用HAVING过滤分组结果,正确的用法
SELECT deptno as "部门编号", AVG(sal) as "平均工资"
FROM emp
HAVING AVG(sal) > 2000
GROUP BY deptno;
--错误的HAVING子句的用法
--having子句的限制:只能取出现在选择列表或group by子句中的列表,否则报错
SELECT deptno as "部门编号", AVG(sal) as "平均工资"
FROM emp
GROUP BY deptno
HAVING job = '职员';
--9.4.5 使用Rollup和Cube(好新鲜的感觉!哈哈!)
--rollup是group by子句的一个扩展,用来创建分组中的小计和总几行
SELECT deptno, avg(sal) FROM emp GROUP BY ROLLUP(deptno);
SELECT deptno, job, avg(sal) FROM emp GROUP BY ROLLUP(deptno, job);
--使用ROLLUP显示小计和总计信息
SELECT deptno as "部门编号",
job as "职位",
(CASE
WHEN GROUPING(deptno) = 1 THEN
'总计:'
WHEN GROUPING(job) = 1 THEN
'职别小计:'
ELSE
''
END) as "统计栏",
SUM(sal) as "工资总数"
FROM emp
GROUP BY ROLLUP(deptno, job);
--使用CUBE进行纵向分组
SELECT deptno as "部门编号",
job as "职位",
(CASE
WHEN GROUPING(deptno) = 1 THEN
'总计:'
WHEN GROUPING(job) = 1 THEN
'职别小计:'
ELSE
''
END) as "统计栏",
SUM(sal) as "工资总数"
FROM emp
GROUP BY CUBE(deptno, job);
--9.4.6 使用grouping sets子句
--使用GROUPING SETS
--grouping sets子句扩展了group by子句,允许指定多个分组或移除由rollup或cube产生的不想要的分组
--注意:group by 和where 同时出现了!这个语句还待研究研究!
SELECT a.deptno as "部门编号",
a.job as "职位",
b.loc "部门地址",
COUNT(a.empno) as "员工人数"
FROM emp a, dept b
WHERE a.deptno = b.deptno
GROUP BY GROUPING SETS((a.deptno, a.job),(b.loc),());
--可以转换为如下所示的几个查询:
SELECT a.deptno as "部门编号",
a.job as "职位",
NULL as "部门地址",
COUNT(a.empno) as "员工人数"
FROM emp a, dept b
WHERE a.deptno = b.deptno
GROUP BY a.deptno, a.job
UNION ALL
SELECT NULL as "部门编号",
NULL as "职位",
b.loc as "部门地址",
COUNT(a.empno) as "员工人数"
FROM emp a, dept b
WHERE a.deptno = b.deptno
GROUP BY b.loc
UNION ALL
SELECT NULL as "部门编号",
NULL as "职位",
NULL as "部门地址",
COUNT(a.empno) as "员工人数"
FROM emp a, dept b
WHERE a.deptno = b.deptno;