1、评级函数
(1). 排序rank()、dense_rank()
遇到重复的,rank()下一个加2,dense_rank() 下一个加1
select s.prod_id, sum(s.amount_sold),
rank() over (order by sum(s.amount_sold) desc) as rank,
dense_rank() over (order by sum(s.amount_sold) desc) as dense_rank
from all_sales s where s.year=1998 and s.amount_sold is not null
group by s.prod_id
order by s.prod_id;
结果:
PROD_ID SUM(S.AMOUNT_SOLD) RANK DENSE_RANK
---------- ------------------ ---------- ----------
13 936197.53 7 7
14 2733887.43 2 2
15 1368317.88 5 5
16 11.99 60 60
17 2239127.88 3 3
18 5477218.04 1 1
19 182670.35 20 20
20 990525.95 6 6
21 1535187.44 4 4
22 31853.11 54 54
23 85211.28 36 36
24 163929.27 22 22
25 522713.71 13 13
26 567533.83 12 12
27 107968.24 30 30
28 644480.02 9 9
29 578374.62 11 11
30 59391.8 48 48
31 64464.83 45 45
32 124081.8 26 26
还可以通过 NULLS LAST 或 NULLS FIRST 控制null放在首位或末位
select s.prod_id, sum(s.amount_sold),
rank() over (order by sum(s.amount_sold) desc nulls last) as rank,
dense_rank() over (order by sum(s.amount_sold) desc nulls last) as dense_rank
from all_sales s where s.year=1998 and s.amount_sold is not null
group by s.prod_id
order by s.prod_id;
与PARTITION BY 子句结合使用
select s.prod_id, s.month_id, sum(s.amount_sold),
rank() over (partition by s.month_id order by sum(s.amount_sold) desc) as rank,
dense_rank() over (partition by s.month_id order by sum(s.amount_sold) desc) as dense_rank
from all_sales s where s.year=1998 and s.amount_sold is not null
group by s.prod_id, month_id
order by s.prod_id, month_id;
结果:
PROD_ID MONTH_ID SUM(S.AMOUNT_SOLD) RANK DENSE_RANK
1 13 01 125575.64 6 6
2 13 02 122325.21 5 5
3 13 03 61649.5 6 6
4 13 04 17404.26 16 16
5 13 05 61649.5 6 6
6 13 06 20004 16 16
7 13 07 164719.38 4 4
8 13 08 125010.33 5 5
9 13 10 112205.27 6 6
10 13 12 125654.44 5 5
11 14 01 239773.24 3 3
12 14 02 278879.97 2 2
rank()、dense_rank() 等同样可以与rollup、cube、grouping sets 等函数合用
cume_dist 计算某个值相对于一组值中的位置,即 cumulative distribution (累积分布)。
percent_rank 某个值相对于一组值的百分比
select s.prod_id, sum(s.amount_sold),
cume_dist() over ( order by sum(s.amount_sold) desc) as cume_dist,
percent_rank() over ( order by sum(s.amount_sold) desc) as percent_rank
from all_sales s where s.year=1998 and s.amount_sold is not null
group by s.prod_id
order by s.prod_id;
PROD_ID SUM(S.AMOUNT_SOLD) CUME_DIST PERCENT_RANK
1 13 936197.53 0.116666666666667 0.101694915254237
2 14 2733887.43 0.0333333333333333 0.0169491525423729
3 15 1368317.88 0.0833333333333333 0.0677966101694915
4 16 11.99 1 1
5 17 2239127.88 0.05 0.0338983050847458
6 18 5477218.04 0.0166666666666667 0
7 19 182670.35 0.333333333333333 0.322033898305085
8 20 990525.95 0.1 0.0847457627118644
9 21 1535187.44 0.0666666666666667 0.0508474576271186
10 22 31853.11 0.9 0.898305084745763
11 23 85211.28 0.6 0.593220338983051
12 24 163929.27 0.366666666666667 0.355932203389831
13 25 522713.71 0.216666666666667 0.203389830508475
14 26 567533.83 0.2 0.186440677966102
15 27 107968.24 0.5 0.491525423728814
16 28 644480.02 0.15 0.135593220338983
17 29 578374.62 0.183333333333333 0.169491525423729
18 30 59391.8 0.8 0.796610169491525
19 31 64464.83 0.75 0.745762711864407
20 32 124081.8 0.433333333333333 0.423728813559322
21 33 110987.48 0.483333333333333 0.474576271186441
22 34 106525.01 0.516666666666667 0.508474576271186
23 35 269009.61 0.283333333333333 0.271186440677966
24 36 131170.12 0.416666666666667 0.406779661016949
25 37 293152.28 0.266666666666667 0.254237288135593
26 38 61209.1 0.783333333333333 0.779661016949152
27 39 149108.82 0.4 0.389830508474576
28 40 412274.43 0.25 0.23728813559322
29 41 101928.66 0.533333333333333 0.525423728813559
30 42 96450.49 0.566666666666667 0.559322033898305
31 43 63514.58 0.766666666666667 0.76271186440678
32 44 54659.47 0.833333333333333 0.830508474576271
33 45 122265.54 0.45 0.440677966101695
34 46 73544.8 0.65 0.644067796610169
35 47 70288.37 0.716666666666667 0.711864406779661
36 48 69656.88 0.733333333333333 0.728813559322034
37 113 91540.88 0.583333333333333 0.576271186440678
38 114 71070.25 0.683333333333333 0.677966101694915
39 115 10505.43 0.933333333333333 0.932203389830508
40 116 56622.64 0.816666666666667 0.813559322033898
41 117 40338.17 0.85 0.847457627118644
42 118 226875.98 0.3 0.288135593220339
43 119 39241.12 0.866666666666667 0.864406779661017
44 120 27333.37 0.916666666666667 0.915254237288135
45 123 163865.43 0.383333333333333 0.372881355932203
46 124 624.82 0.983333333333333 0.983050847457627
47 125 117214.73 0.466666666666667 0.457627118644068
48 126 172907.76 0.35 0.338983050847458
49 127 611329.86 0.166666666666667 0.152542372881356
50 128 221494.36 0.316666666666667 0.305084745762712
51 129 496483.76 0.233333333333333 0.220338983050847
52 130 691798.97 0.133333333333333 0.11864406779661
53 131 70645.28 0.7 0.694915254237288
54 132 83353.36 0.616666666666667 0.610169491525424
55 133 76010 0.633333333333333 0.627118644067797
56 136 8989.83 0.95 0.949152542372881
57 140 97259.84 0.55 0.542372881355932
58 146 35771.66 0.883333333333333 0.88135593220339
59 147 1685.89 0.966666666666667 0.966101694915254
60 148 72058.92 0.666666666666667 0.661016949152542
(3) 分片NTILE
ntile(n), 划分为n个分片,查找属于哪个分片
select s.prod_id, sum(s.amount_sold),
ntile(4) over ( order by sum(s.amount_sold) desc) as ntile
from all_sales s where s.year=1998 and s.amount_sold is not null
group by s.prod_id
order by s.prod_id;
结果:
PROD_ID SUM(S.AMOUNT_SOLD) NTILE
1 13 936197.53 1
2 14 2733887.43 1
3 15 1368317.88 1
4 16 11.99 4
5 17 2239127.88 1
6 18 5477218.04 1
7 19 182670.35 2
8 20 990525.95 1
9 21 1535187.44 1
10 22 31853.11 4
11 23 85211.28 3
12 24 163929.27 2
13 25 522713.71 1
14 26 567533.83 1
15 27 107968.24 2
16 28 644480.02 1
17 29 578374.62 1
18 30 59391.8 4
19 31 64464.83 3
20 32 124081.8 2
21 33 110987.48 2
22 34 106525.01 3
23 35 269009.61 2
24 36 131170.12 2
25 37 293152.28 2
26 38 61209.1 4
27 39 149108.82 2
28 40 412274.43 1
29 41 101928.66 3
30 42 96450.49 3
31 43 63514.58 4
32 44 54659.47 4
33 45 122265.54 2
34 46 73544.8 3
35 47 70288.37 3
36 48 69656.88 3
37 113 91540.88 3
38 114 71070.25 3
39 115 10505.43 4
40 116 56622.64 4
41 117 40338.17 4
42 118 226875.98 2
43 119 39241.12 4
44 120 27333.37 4
45 123 163865.43 2
46 124 624.82 4
47 125 117214.73 2
48 126 172907.76 2
49 127 611329.86 1
50 128 221494.36 2
51 129 496483.76 1
52 130 691798.97 1
53 131 70645.28 3
54 132 83353.36 3
55 133 76010 3
56 136 8989.83 4
57 140 97259.84 3
58 146 35771.66 4
59 147 1685.89 4
60 148 72058.92 3
row_number 从1开始,每个分组返回一个数字。
select s.prod_id, sum(s.amount_sold),
row_number() over ( order by sum(s.amount_sold) desc) as row_number
from all_sales s where s.year=1998 and s.amount_sold is not null
group by s.prod_id
order by s.prod_id;
结果:
PROD_ID SUM(S.AMOUNT_SOLD) ROW_NUMBER
1 13 936197.53 7
2 14 2733887.43 2
3 15 1368317.88 5
4 16 11.99 60
5 17 2239127.88 3
6 18 5477218.04 1
7 19 182670.35 20
8 20 990525.95 6
9 21 1535187.44 4
10 22 31853.11 54
11 23 85211.28 36
12 24 163929.27 22
13 25 522713.71 13
14 26 567533.83 12
15 27 107968.24 30
16 28 644480.02 9
17 29 578374.62 11
18 30 59391.8 48
19 31 64464.83 45
20 32 124081.8 26
21 33 110987.48 29
22 34 106525.01 31
23 35 269009.61 17
24 36 131170.12 25
25 37 293152.28 16
26 38 61209.1 47
27 39 149108.82 24
28 40 412274.43 15
29 41 101928.66 32
30 42 96450.49 34
31 43 63514.58 46
32 44 54659.47 50
33 45 122265.54 27
34 46 73544.8 39
35 47 70288.37 43
36 48 69656.88 44
37 113 91540.88 35
38 114 71070.25 41
39 115 10505.43 56
40 116 56622.64 49
41 117 40338.17 51
42 118 226875.98 18
43 119 39241.12 52
44 120 27333.37 55
45 123 163865.43 23
46 124 624.82 59
47 125 117214.73 28
48 126 172907.76 21
49 127 611329.86 10
50 128 221494.36 19
51 129 496483.76 14
52 130 691798.97 8
53 131 70645.28 42
54 132 83353.36 37
55 133 76010 38
56 136 8989.83 57
57 140 97259.84 33
58 146 35771.66 53
59 147 1685.89 58
60 148 72058.92 40
2、反百分点函数
使用反百分比函数可以获得对应某个百分点的值。
percenttile_disc、percentile_cont 与 cume_dist、percent_rank 作用相反。
select percentile_cont(0.6) within group ( order by sum(s.amount_sold) desc) as percentile_cont,
percentile_disc(0.6) within group ( order by sum(s.amount_sold) desc) as percentile_disc
from all_sales s where s.year=1998 and s.amount_sold is not null
group by s.prod_id
order by s.prod_id;
结果:
PERCENTILE_CONT PERCENTILE_DISC
1 84468.112 85211.28
3、窗口函数
(1)、累积和: rows between unbounded preceding and current row
计算某年1月到12月累计销量:
select s.month_id, sum(s.amount_sold),
sum(sum(s.amount_sold)) over
(order by s.month_id rows between unbounded preceding and current row)
as cumulative_amount
from all_sales s where s.year=1998 and s.amount_sold is not null
group by s.month_id
order by s.month_id;
结果
MONTH_ID SUM(S.AMOUNT_SOLD) CUMULATIVE_AMOUNT
1 01 2277420.49 2277420.49
2 02 2372690.87 4650111.36
3 03 1830572.64 6480684
4 04 1975978.3 8456662.3
5 05 1748287.23 10204949.53
6 06 1869728.61 12074678.14
7 07 1932282.28 14006960.42
8 08 1972532.63 15979493.05
9 09 2167008.19 18146501.24
10 10 2236464.53 20382965.77
11 11 1959664.22 22342629.99
12 12 1741284.96 24083914.95
(2)、移动平均值 rows between N preceding and current row
某年当月与前三个月之间的移动平均值:
select s.month_id, sum(s.amount_sold),
avg(sum(s.amount_sold)) over
(order by s.month_id rows between 3 preceding and current row)
as moving_avg
from all_sales s where s.year=1998 and s.amount_sold is not null
group by s.month_id
order by s.month_id;
结果:
1 01 2277420.49 2277420.49
2 02 2372690.87 2325055.68
3 03 1830572.64 2160228
4 04 1975978.3 2114165.575
5 05 1748287.23 1981882.26
6 06 1869728.61 1856141.695
7 07 1932282.28 1881569.105
8 08 1972532.63 1880707.6875
9 09 2167008.19 1985387.9275
10 10 2236464.53 2077071.9075
11 11 1959664.22 2083917.3925
12 12 1741284.96 2026105.475
(3)、中心平均值: between N preceding and N following
计算当月与前后1月的中心平均值
select s.month_id, sum(s.amount_sold),
avg(sum(s.amount_sold)) over
(order by s.month_id rows between 1 preceding and 1 following )
as moving_avg
from all_sales s where s.year=1998 and s.amount_sold is not null
group by s.month_id
order by s.month_id;
MONTH_ID SUM(S.AMOUNT_SOLD) MOVING_AVG
1 01 2277420.49 2325055.68
2 02 2372690.87 2160228
3 03 1830572.64 2059747.27
4 04 1975978.3 1851612.72333333
5 05 1748287.23 1864664.71333333
6 06 1869728.61 1850099.37333333
7 07 1932282.28 1924847.84
8 08 1972532.63 2023941.03333333
9 09 2167008.19 2125335.11666667
10 10 2236464.53 2121045.64666667
11 11 1959664.22 1979137.90333333
12 12 1741284.96 1850474.59
(4)、FIRST_VALUE 、 LAST_VALUE
FIRST_VALUE 、 LAST_VALUE 获取窗口的首行、末行。
select s.month_id, sum(s.amount_sold),
first_value(sum(s.amount_sold)) over
(order by s.month_id rows between 1 preceding and 1 following )
as last_month_amount,
last_value(sum(s.amount_sold)) over
(order by s.month_id rows between 1 preceding and 1 following )
as next_month_amount
from all_sales s where s.year=1998 and s.amount_sold is not null
group by s.month_id
order by s.month_id;
结果:
MONTH_ID SUM(S.AMOUNT_SOLD) LAST_MONTH_AMOUNT NEXT_MONTH_AMOUNT
1 01 2277420.49 2277420.49 2372690.87
2 02 2372690.87 2277420.49 1830572.64
3 03 1830572.64 2372690.87 1975978.3
4 04 1975978.3 1830572.64 1748287.23
5 05 1748287.23 1975978.3 1869728.61
6 06 1869728.61 1748287.23 1932282.28
7 07 1932282.28 1869728.61 1972532.63
8 08 1972532.63 1932282.28 2167008.19
9 09 2167008.19 1972532.63 2236464.53
10 10 2236464.53 2167008.19 1959664.22
11 11 1959664.22 2236464.53 1741284.96
12 12 1741284.96 1959664.22 1741284.96
4、报表函数
包括 sum、avg、max、min、count、variance、stddev、ratio_to_report等。
(1)、总计报表
计算每月销量总和以及所有产品销量总和
select s.prod_id, s.month_id,
sum(sum(s.amount_sold)) over (partition by s.month_id) as month_total,
sum(sum(s.amount_sold)) over (partition by s.prod_id) as prod_total
from all_sales s where s.year=1998 and s.amount_sold is not null
group by s.prod_id, s.month_id
order by s.prod_id, s.month_id;
, PROD_ID, MONTH_ID, MONTH_TOTAL, PROD_TOTAL
1 13 01 2277420.49 936197.53
2 13 02 2372690.87 936197.53
3 13 03 1830572.64 936197.53
4 13 04 1975978.3 936197.53
5 13 05 1748287.23 936197.53
6 13 06 1869728.61 936197.53
7 13 07 1932282.28 936197.53
8 13 08 1972532.63 936197.53
9 13 10 2236464.53 936197.53
10 13 12 1741284.96 936197.53
11 14 01 2277420.49 2733887.43
12 14 02 2372690.87 2733887.43
13 14 03 1830572.64 2733887.43
14 14 04 1975978.3 2733887.43
15 14 05 1748287.23 2733887.43
16 14 06 1869728.61 2733887.43
17 14 07 1932282.28 2733887.43
18 14 08 1972532.63 2733887.43
19 14 09 2167008.19 2733887.43
20 14 10 2236464.53 2733887.43
21 14 11 1959664.22 2733887.43
22 14 12 1741284.96 2733887.43
23 15 01 2277420.49 1368317.88
24 15 02 2372690.87 1368317.88
25 15 03 1830572.64 1368317.88
26 15 04 1975978.3 1368317.88
27 15 05 1748287.23 1368317.88
28 15 06 1869728.61 1368317.88
29 15 07 1932282.28 1368317.88
30 15 08 1972532.63 1368317.88
31 15 09 2167008.19 1368317.88
32 15 10 2236464.53 1368317.88
33 15 11 1959664.22 1368317.88
34 15 12 1741284.96 1368317.88
35 16 03 1830572.64 11.99
36 17 01 2277420.49 2239127.88
37 17 02 2372690.87 2239127.88
38 17 03 1830572.64 2239127.88
39 17 04 1975978.3 2239127.88
40 17 05 1748287.23 2239127.88
41 17 06 1869728.61 2239127.88
42 17 07 1932282.28 2239127.88
43 17 08 1972532.63 2239127.88
44 17 09 2167008.19 2239127.88
45 17 10 2236464.53 2239127.88
46 17 11 1959664.22 2239127.88
47 17 12 1741284.96 2239127.88
48 18 01 2277420.49 5477218.04
49 18 02 2372690.87 5477218.04
50 18 03 1830572.64 5477218.04
51 18 04 1975978.3 5477218.04
52 18 05 1748287.23 5477218.04
53 18 06 1869728.61 5477218.04
54 18 07 1932282.28 5477218.04
55 18 08 1972532.63 5477218.04
56 18 09 2167008.19 5477218.04
57 18 10 2236464.53 5477218.04
58 18 11 1959664.22 5477218.04
59 18 12 1741284.96 5477218.04
60 19 01 2277420.49 182670.35
(2)、RATIO_TO_REPORT 函数
RATIO_TO_REPORT 可用来计算某个值在一组值的总和中所占的比例。
计算每个产品每个月销量总和,以及该类型在整月中的比例。
select s.prod_id, s.month_id,
sum(s.amount_sold) as prod_month_total,
ratio_to_report(sum(s.amount_sold)) over (partition by s.month_id) as prod_ratio
from all_sales s where s.year=1998 and s.amount_sold is not null
group by s.prod_id, s.month_id
order by s.prod_id, s.month_id;
结果:
PROD_ID MONTH_ID PROD_MONTH_TOTAL PROD_RATIO
1 13 01 125575.64 0.0551394178419814
2 13 02 122325.21 0.0515554771785336
3 13 03 61649.5 0.0336777130024187
4 13 04 17404.26 0.00880792061329823
5 13 05 61649.5 0.0352627983217609
6 13 06 20004 0.0106988789137692
7 13 07 164719.38 0.085246023163862
8 13 08 125010.33 0.0633755447685547
9 13 10 112205.27 0.0501708247525839
10 13 12 125654.44 0.0721619050795684
11 14 01 239773.24 0.105282814944727
12 14 02 278879.97 0.117537422816483
5、LAG与LEAD函数
LAG 与 LEAD 函数获得位于距当前指定距离处那条记录中的数据。
如获得前一个月 与后一个月的数据:
select s.month_id, sum(s.amount_sold),
lag(sum(s.amount_sold)) over ( order by s.month_id) as last_month_sum,
lead(sum(s.amount_sold)) over ( order by s.month_id) as next_month_sum
from all_sales s where s.year=1998 and s.amount_sold is not null
group by s.month_id
order by s.month_id;
结果:
MONTH_ID SUM(S.AMOUNT_SOLD) LAST_MONTH_SUM NEXT_MONTH_SUM
1 01 2277420.49 2372690.87
2 02 2372690.87 2277420.49 1830572.64
3 03 1830572.64 2372690.87 1975978.3
4 04 1975978.3 1830572.64 1748287.23
5 05 1748287.23 1975978.3 1869728.61
6 06 1869728.61 1748287.23 1932282.28
7 07 1932282.28 1869728.61 1972532.63
8 08 1972532.63 1932282.28 2167008.19
9 09 2167008.19 1972532.63 2236464.53
10 10 2236464.53 2167008.19 1959664.22
11 11 1959664.22 2236464.53 1741284.96
12 12 1741284.96 1959664.22
6、FIRST函数与LAST函数
first 和 last 返回排序分组中的第一个或者最后一个值。
如查询某年销量最高的、最低的月份。
select min(s.month_id) keep (dense_rank first order by sum(s.amount_sold)) as min_month,
max(s.month_id) keep (dense_rank last order by sum(s.amount_sold)) as max_month
from all_sales s where s.year=1998 and s.amount_sold is not null
group by s.month_id
order by s.month_id;
MIN_MONTHMAX_MONTH
1 12 02
7、线性回归函数
8、假设评级及分布函数
假象评级与分布函数可以计算一条新记录在表中的排名和百分比,与 rank(),dense_rank(), percent_rank(), cume_dist() 等连用。
例如,查找 sum(amount) 为20004 的假想排名和百分比排名
select rank(20004) within group ( order by sum(s.amount_sold) desc) as rank,
percent_rank(20004) within group ( order by sum(s.amount_sold) desc) as pencent_rank
from all_sales s where s.year=1998 and s.amount_sold is not null
group by s.prod_id
order by s.prod_id;
结果:
RANK PENCENT_RANK
1 56 0.916666666666667