SQL> select * from emp;
EMPNO ENAME JOB MGR HIREDATE SAL COMM DEPTNO
---------- ---------- --------- ---------- -------------- ---------- ---------- ----------
7369 SMITH CLERK 7902 17-12月-80 800 20
7499 ALLEN SALESMAN 7698 20-2月 -81 1600 300 30
7521 WARD SALESMAN 7698 22-2月 -81 1250 500 30
7566 JONES MANAGER 7839 02-4月 -81 2975 20
7654 MARTIN SALESMAN 7698 28-9月 -81 1250 1400 30
7698 BLAKE MANAGER 7839 01-5月 -81 2850 30
7782 CLARK MANAGER 7839 09-6月 -81 2450 10
7788 SCOTT ANALYST 7566 19-4月 -87 3000 20
7839 KING PRESIDENT 17-11月-81 5000 10
7844 TURNER SALESMAN 7698 08-9月 -81 1500 0 30
7876 ADAMS CLERK 7788 23-5月 -87 1100 20
EMPNO ENAME JOB MGR HIREDATE SAL COMM DEPTNO
---------- ---------- --------- ---------- -------------- ---------- ---------- ----------
7900 JAMES CLERK 7698 03-12月-81 950 30
7902 FORD ANALYST 7566 03-12月-81 3000 20
7934 MILLER CLERK 7782 23-1月 -82 1300 10
已选择14行。
SQL> select deptno,
2 ename,
3 sal,
4 sum(sal) over(order by ename) "连续求和",
5 sum(sal) over() "总和", -- 此处sum(sal) over () 等同于sum(sal)
round(sal / sum(sal) over(), 4) * 100 "份额(%)"
7 from emp;
DEPTNO ENAME SAL 连续求和 总和 份额(%)
---------- ---------- ---------- ---------- ---------- ----------
20 ADAMS 1100 1100 29025 3.79
30 ALLEN 1600 2700 29025 5.51
30 BLAKE 2850 5550 29025 9.82
10 CLARK 2450 8000 29025 8.44
20 FORD 3000 11000 29025 10.34
30 JAMES 950 11950 29025 3.27
20 JONES 2975 14925 29025 10.25
10 KING 5000 19925 29025 17.23
30 MARTIN 1250 21175 29025 4.31
10 MILLER 1300 22475 29025 4.48
20 SCOTT 3000 25475 29025 10.34
DEPTNO ENAME SAL 连续求和 总和 份额(%)
---------- ---------- ---------- ---------- ---------- ----------
20 SMITH 800 26275 29025 2.76
30 TURNER 1500 27775 29025 5.17
30 WARD 1250 29025 29025 4.31
已选择14行。
SQL> select deptno,
2 ename,
3 sal,
4 sum(sal) over(partition by deptno order by ename) "部门连续求和", --各部门的薪水"连续"求和
5 sum(sal) over(partition by deptno) "部门总和", -- 部门统计的总和,同一部门总和不变
6 round(sal / sum(sal) over(partition by deptno), 4) * 100 "部门份额(%)",
7 sum(sal) over(order by deptno, ename) "连续求和", --所有部门的薪水"连续"求和
8 sum(sal) over() "总和", -- 此处sum(sal) over () 等同于sum(sal),所有员工的薪水总和
9 round(sal / sum(sal) over(), 4) * 100 "总份额(%)"
10 from emp;
DEPTNO ENAME SAL 部门连续求和 部门总和 部门份额(%) 连续求和 总和 总份额(%)
---------- ---------- ---------- ------------ ---------- ----------- ---------- ---------- ----------
10 CLARK 2450 2450 8750 28 2450 29025 8.44
10 KING 5000 7450 8750 57.14 7450 29025 17.23
10 MILLER 1300 8750 8750 14.86 8750 29025 4.48
20 ADAMS 1100 1100 10875 10.11 9850 29025 3.79
20 FORD 3000 4100 10875 27.59 12850 29025 10.34
20 JONES 2975 7075 10875 27.36 15825 29025 10.25
20 SCOTT 3000 10075 10875 27.59 18825 29025 10.34
20 SMITH 800 10875 10875 7.36 19625 29025 2.76
30 ALLEN 1600 1600 9400 17.02 21225 29025 5.51
30 BLAKE 2850 4450 9400 30.32 24075 29025 9.82
30 JAMES 950 5400 9400 10.11 25025 29025 3.27
DEPTNO ENAME SAL 部门连续求和 部门总和 部门份额(%) 连续求和 总和 总份额(%)
---------- ---------- ---------- ------------ ---------- ----------- ---------- ---------- ----------
30 MARTIN 1250 6650 9400 13.3 26275 29025 4.31
30 TURNER 1500 8150 9400 15.96 27775 29025 5.17
30 WARD 1250 9400 9400 13.3 29025 29025 4.31
已选择14行。
SQL> select deptno,
2 ename,
3 sal,
4 sum(sal) over(partition by deptno order by sal) dept_sum,
sum(sal) over(order by deptno, sal) sum --求和规则有按部门分区的,有不分区的例子
6 from emp;
DEPTNO ENAME SAL DEPT_SUM SUM
---------- ---------- ---------- ---------- ----------
10 MILLER 1300 1300 1300
10 CLARK 2450 3750 3750
10 KING 5000 8750 8750
20 SMITH 800 800 9550
20 ADAMS 1100 1900 10650
20 JONES 2975 4875 13625
20 SCOTT 3000 10875 19625
20 FORD 3000 10875 19625
30 JAMES 950 950 20575
30 MARTIN 1250 3450 23075
30 WARD 1250 3450 23075
DEPTNO ENAME SAL DEPT_SUM SUM
---------- ---------- ---------- ---------- ----------
30 TURNER 1500 4950 24575
30 ALLEN 1600 6550 26175
30 BLAKE 2850 9400 29025
已选择14行。
SQL> select deptno,
2 ename,
3 sal,
4 sum(sal) over(partition by deptno order by deptno desc, sal desc) dept_sum,
sum(sal) over(order by deptno desc, sal desc) sum --逆序的,即部门从大到小排列,部门里各员工的薪水从高到低排列,累计和的规则不变。
6 from emp;
DEPTNO ENAME SAL DEPT_SUM SUM
---------- ---------- ---------- ---------- ----------
30 BLAKE 2850 2850 2850
30 ALLEN 1600 4450 4450
30 TURNER 1500 5950 5950
30 WARD 1250 8450 8450
30 MARTIN 1250 8450 8450
30 JAMES 950 9400 9400
20 FORD 3000 6000 15400
20 SCOTT 3000 6000 15400
20 JONES 2975 8975 18375
20 ADAMS 1100 10075 19475
20 SMITH 800 10875 20275
DEPTNO ENAME SAL DEPT_SUM SUM
---------- ---------- ---------- ---------- ----------
10 KING 5000 5000 25275
10 CLARK 2450 7450 27725
10 MILLER 1300 8750 29025
已选择14行。
SQL> select deptno,
2 ename,
3 sal,
sum(sal) over(partition by deptno order by sal) dept_sum,
sum(sal) over(order by deptno, sal) sum
6 from emp
7 order by deptno desc; --如果再在句尾添加排序子句,一致倒罢了,不一致,结果就令人费劲了。
DEPTNO ENAME SAL DEPT_SUM SUM
---------- ---------- ---------- ---------- ----------
30 JAMES 950 950 20575
30 MARTIN 1250 3450 23075
30 WARD 1250 3450 23075
30 TURNER 1500 4950 24575
30 ALLEN 1600 6550 26175
30 BLAKE 2850 9400 29025
20 SMITH 800 800 9550
20 ADAMS 1100 1900 10650
20 JONES 2975 4875 13625
20 FORD 3000 10875 19625
20 SCOTT 3000 10875 19625
DEPTNO ENAME SAL DEPT_SUM SUM
---------- ---------- ---------- ---------- ----------
10 MILLER 1300 1300 1300
10 CLARK 2450 3750 3750
10 KING 5000 8750 8750
已选择14行。
下面这两条语句的输出结果是一样的,仔细体会一下:
SQL> select distinct deptno,
sum(sal) over(partition by deptno) "部门总和"
3 from emp;
DEPTNO 部门总和
---------- ----------
10 8750
20 10875
30 9400
SQL> select deptno,
sum(sum(sal)) over(partition by deptno) "部门总和"
3 from emp group by deptno;
DEPTNO 部门总和
---------- ----------
10 8750
20 10875
30 9400
简单例子:
下面我们通过一个实际的例子:按区域查找上一年度订单总额占区域订单总额20%以上的客户,来看看分析函数的应用。
1、测试环境:
create table orders_tmp(
CUST_NBR NUMBER(5) NOT NULL,
REGION_ID NUMBER(5) NOT NULL,
SALESPERSON_ID NUMBER(5) NOT NULL,
YEAR NUMBER(4) NOT NULL,
MONTH NUMBER(2) NOT NULL,
TOT_ORDERS NUMBER(7) NOT NULL,
TOT_SALES NUMBER(11,2) NOT NULL
);
2、测试数据:
insert into orders_tmp(CUST_NBR,REGION_ID,SALESPERSON_ID,YEAR,MONTH,TOT_ORDERS,TOT_SALES) values(11,7,11,2001,7,2,12204);
insert into orders_tmp(CUST_NBR,REGION_ID,SALESPERSON_ID,YEAR,MONTH,TOT_ORDERS,TOT_SALES) values(4,5,4,2001,10,2,37802);
insert into orders_tmp(CUST_NBR,REGION_ID,SALESPERSON_ID,YEAR,MONTH,TOT_ORDERS,TOT_SALES) values(7,6,7,2001,2,3,3750);
insert into orders_tmp(CUST_NBR,REGION_ID,SALESPERSON_ID,YEAR,MONTH,TOT_ORDERS,TOT_SALES) values(10,6,8,2001,1,2,21691);
insert into orders_tmp(CUST_NBR,REGION_ID,SALESPERSON_ID,YEAR,MONTH,TOT_ORDERS,TOT_SALES) values(10,6,7,2001,2,3,42624);
insert into orders_tmp(CUST_NBR,REGION_ID,SALESPERSON_ID,YEAR,MONTH,TOT_ORDERS,TOT_SALES) values(15,7,12,2000,5,6,24);
insert into orders_tmp(CUST_NBR,REGION_ID,SALESPERSON_ID,YEAR,MONTH,TOT_ORDERS,TOT_SALES) values(12,7,9,2000,6,2,50658);
insert into orders_tmp(CUST_NBR,REGION_ID,SALESPERSON_ID,YEAR,MONTH,TOT_ORDERS,TOT_SALES) values(1,5,2,2000,3,2,44494);
insert into orders_tmp(CUST_NBR,REGION_ID,SALESPERSON_ID,YEAR,MONTH,TOT_ORDERS,TOT_SALES) values(1,5,1,2000,9,2,74864);
insert into orders_tmp(CUST_NBR,REGION_ID,SALESPERSON_ID,YEAR,MONTH,TOT_ORDERS,TOT_SALES) values(2,5,4,2000,3,2,35060);
insert into orders_tmp(CUST_NBR,REGION_ID,SALESPERSON_ID,YEAR,MONTH,TOT_ORDERS,TOT_SALES) values(2,5,4,2000,4,4,6454);
insert into orders_tmp(CUST_NBR,REGION_ID,SALESPERSON_ID,YEAR,MONTH,TOT_ORDERS,TOT_SALES) values(2,5,1,2000,10,4,35580);
insert into orders_tmp(CUST_NBR,REGION_ID,SALESPERSON_ID,YEAR,MONTH,TOT_ORDERS,TOT_SALES) values(4,5,4,2000,12,2,39190);
SQL> select * from orders_tmp;
CUST_NBR REGION_ID SALESPERSON_ID YEAR MONTH TOT_ORDERS TOT_SALES
---------- ---------- -------------- ---------- ---------- ---------- ----------
11 7 11 2001 7 2 12204
4 5 4 2001 10 2 37802
7 6 7 2001 2 3 3750
10 6 8 2001 1 2 21691
10 6 7 2001 2 3 42624
15 7 12 2000 5 6 24
12 7 9 2000 6 2 50658
1 5 2 2000 3 2 44494
1 5 1 2000 9 2 74864
2 5 4 2000 3 2 35060
2 5 4 2000 4 4 6454
CUST_NBR REGION_ID SALESPERSON_ID YEAR MONTH TOT_ORDERS TOT_SALES
---------- ---------- -------------- ---------- ---------- ---------- ----------
2 5 1 2000 10 4 35580
4 5 4 2000 12 2 39190
已选择13行。
3、测试语句
SQL> select o.cust_nbr customer,
2 o.region_id region,
3 sum(o.tot_sales) cust_sales,
4 sum(sum(o.tot_sales)) over(partition by o.region_id) region_sales --(1)
5 from orders_tmp o
6 where o.year = 2001
7 group by o.region_id, o.cust_nbr; --(2)
CUSTOMER REGION CUST_SALES REGION_SALES
---------- ---------- ---------- ------------
4 5 37802 37802
7 6 3750 68065
10 6 64315 68065
11 7 12204 12204
分析:请注意上面的(1)和(2),group by的意图很明显:将数据按区域ID,客户进行分组,那么over这一部分有什么用呢?假如我们只需要统计每个区域每个客户的订单总额,那么我们只需要group byo.region_id,o.cust_nbr就够了。但我们还想在每一行显示该客户所在区域的订单总额,这一点和前面的不同:需要在前面分组的基础上按区域累加。很显然group by和sum是无法做到这一点的(因为聚集操作的级别不一样,前者是对一个客户,后者是对一批客户)。
这就是over函数的作用了!它的作用是告诉SQL引擎:按区域对数据进行分区,然后累积每个区域每个客户的订单总额(sum(sum(o.tot_sales)))。
现在我们已经知道2001年度每个客户及其对应区域的订单总额,那么下面就是筛选那些个人订单总额占到区域订单总额20%以上的大客户了:
SQL> select *
2 from (select o.cust_nbr customer,
3 o.region_id region,
4 sum(o.tot_sales) cust_sales,
5 sum(sum(o.tot_sales)) over(partition by o.region_id) region_sales
6 from orders_tmp o
7 where o.year = 2001
8 group by o.region_id, o.cust_nbr) all_sales
9 where all_sales.cust_sales > all_sales.region_sales * 0.2;
CUSTOMER REGION CUST_SALES REGION_SALES
---------- ---------- ---------- ------------
4 5 37802 37802
10 6 64315 68065
11 7 12204 12204
现在我们已经知道这些大客户是谁了!哦,不过这还不够,如果我们想要知道每个大客户所占的订单比例呢?看看下面的SQL语句,只需要一个简单的Round函数就搞定了。
SQL> select all_sales.*,
round(all_sales.cust_sales / all_sales.region_sales, 4) * 100 || '%' Percent
3 from (select o.cust_nbr customer,
4 o.region_id region,
sum(o.tot_sales) cust_sales,
6 sum(sum(o.tot_sales)) over(partition by o.region_id) region_sales
7 from orders_tmp o
8 where o.year = 2001
9 group by o.region_id, o.cust_nbr) all_sales
10 where all_sales.cust_sales > all_sales.region_sales * 0.2;
CUSTOMER REGION CUST_SALES REGION_SALES PERCENT
---------- ---------- ---------- ------------ -----------------------------------------
4 5 37802 37802 100%
10 6 64315 68065 94.49%
11 7 12204 12204 100%
总结:①Over函数指明在那些字段上做分析,其内跟Partition by表示对数据进行分组。注意Partition by可以有多个字段。
②Over函数可以和其它聚集函数、分析函数搭配,起到不同的作用。例如这里的SUM,max,min,avg,还有诸如Rank,Dense_rank等。
SQL> select deptno,
2 ename,
3 sal,
4 sum(sal) over(partition by deptno) "部门总和",
5 sum(sal) over() "所有总和",
6 max(sal) over(partition by deptno) "部门最大",
7 max(sal) over() "所有最大",
8 min(sal) over(partition by deptno) "部门最小",
9 min(sal) over() "所有最小",
10 round(avg(sal) over(partition by deptno),2) "部门平均",
11 round(avg(sal) over(),2) "所有平均"
12 from emp;
DEPTNO ENAME SAL 部门总和 所有总和 部门最大 所有最大 部门最小 所有最小 部门平均 所有平均
---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------
10 CLARK 2450 8750 29025 5000 5000 1300 800 2916.67 2073.21
10 KING 5000 8750 29025 5000 5000 1300 800 2916.67 2073.21
10 MILLER 1300 8750 29025 5000 5000 1300 800 2916.67 2073.21
20 JONES 2975 10875 29025 3000 5000 800 800 2175 2073.21
20 FORD 3000 10875 29025 3000 5000 800 800 2175 2073.21
20 ADAMS 1100 10875 29025 3000 5000 800 800 2175 2073.21
20 SMITH 800 10875 29025 3000 5000 800 800 2175 2073.21
20 SCOTT 3000 10875 29025 3000 5000 800 800 2175 2073.21
30 WARD 1250 9400 29025 2850 5000 950 800 1566.67 2073.21
30 TURNER 1500 9400 29025 2850 5000 950 800 1566.67 2073.21
30 ALLEN 1600 9400 29025 2850 5000 950 800 1566.67 2073.21
DEPTNO ENAME SAL 部门总和 所有总和 部门最大 所有最大 部门最小 所有最小 部门平均 所有平均
---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------
30 JAMES 950 9400 29025 2850 5000 950 800 1566.67 2073.21
30 BLAKE 2850 9400 29025 2850 5000 950 800 1566.67 2073.21
30 MARTIN 1250 9400 29025 2850 5000 950 800 1566.67 2073.21
已选择14行。