1、集合操作
--1.intersect
with t
as
(
select 1 as v,'abc' as vv from dual
union all
select 1 ,'abc' from dual
union all
select 2,'def' from dual
),
tt
as
(
select 1 as v,'abc' as vv from dual
union all
select 1,'abc' from dual
union all
select 3,'def' from dual
)
/*
只返回一条记录,说明求交集后,会去重。
从执行计划中看,会首先会对第一个表有一个sort操作,选项是unique,也就是去重。
然后第二个表,也是一样。
也就是先把每个表的记录进行排序去重,然后再求交集
V VV
---------- ---
1 abc
*/
select v,vv from t
intersect
select v,vv from tt
--2.minus
with t
as
(
select 1 as v,'abc' as vv from dual
union all
select 1 ,'abc' from dual
union all
select 2,'def' from dual
),
tt
as
(
select 1 as v,'abc' as vv from dual
union all
select 3,'def' from dual
)
/*
从执行计划中看,会首先会对第一个表有一个sort操作,选项是unique,也就是去重。
然后第二个表,也是一样。
然后第一个结果集减去第二个结果集,所以只会返回一条记录。
V VV
---------- ---
2 def
*/
select v,vv from t
minus
select v,vv from tt
2、decode函数、translate函数
select v,
--decode函数类似于case when,可有多个参数
decode(v,
1,1,
2,2,
3,3
),
--translate函数类似于replace,不过是加强版,按照替换规则进行替换
translate(vv, --要替换的字符串
'abcdefghi', --被替换的字符
'123456789') --替换为的字符
from
(
select 1 as v,'abc' as vv from dual
union all
select 2 as v,'def' as vv from dual
union all
select 3 as v,'ghi' as vv from dual
)
3、层次化查询
--1.层次查询:自顶向下、自底向上
--自顶向下
with t
as
(
select 1 as v,null parent_v,'01' vv from dual
union all
select 2 as v,1,'02' as vv from dual
union all
select 3 as v,2,'03' as vv from dual
union all
select 4 as v,2,'04' as vv from dual
union all
select 5 as v,1,'05' as vv from dual
union all
select 6 as v,5,'06' as vv from dual
union all
select 7 as v,6,'07' as vv from dual
union all
select 8 as v,5,'08' as vv from dual
union all
select 9 as v,8,'09' as vv from dual
union all
select 10 as v,1,'10' as vv from dual
union all
select 11 as v,10,'11' as vv from dual
union all
select 12 as v,10,'12' as vv from dual
union all
select 13 as v,10,'13' as vv from dual
)
/*
LPAD('',2*LEVEL-1)||VV
------------------------------------
01
02
03
04
05
06
07
08
09
10
11
12
13
*/
select lpad(' ',2 * level - 1) || vv
from t
start with v = 1
connect by prior v = parent_v;
--自底向上
with t
as
(
select 1 as v,null parent_v,'01' vv from dual
union all
select 2 as v,1,'02' as vv from dual
union all
select 3 as v,2,'03' as vv from dual
union all
select 4 as v,2,'04' as vv from dual
union all
select 5 as v,1,'05' as vv from dual
union all
select 6 as v,5,'06' as vv from dual
union all
select 7 as v,6,'07' as vv from dual
union all
select 8 as v,5,'08' as vv from dual
union all
select 9 as v,8,'09' as vv from dual
union all
select 10 as v,1,'10' as vv from dual
union all
select 11 as v,10,'11' as vv from dual
union all
select 12 as v,10,'12' as vv from dual
union all
select 13 as v,10,'13' as vv from dual
)
/*
LPAD('',2*LEVEL-1)||VV
----------------------------------------------------------------
09
08
05
01
*/
--注意,level是伪劣,第一层恢复返回1
select lpad(' ',2 * level - 1) || vv
from t
start with v = 9
connect by v = prior parent_v; --connect by prior parent_v = v 效果是一样的
--2.过滤
--通过start with的条件,从非根结点开始遍历
with t
as
(
select 1 as v,null parent_v,'01' vv from dual
union all
select 2 as v,1,'02' as vv from dual
union all
select 3 as v,2,'03' as vv from dual
union all
select 4 as v,2,'04' as vv from dual
union all
select 5 as v,1,'05' as vv from dual
union all
select 6 as v,5,'06' as vv from dual
union all
select 7 as v,6,'07' as vv from dual
union all
select 8 as v,5,'08' as vv from dual
union all
select 9 as v,8,'09' as vv from dual
union all
select 10 as v,1,'10' as vv from dual
union all
select 11 as v,10,'11' as vv from dual
union all
select 12 as v,10,'12' as vv from dual
union all
select 13 as v,10,'13' as vv from dual
)
select lpad(' ',2 * level - 1) || vv
from t
start with v = 5 --这个查询条件只是过滤整个查询的起点,也就是从哪个节点开始遍历
--如果没有这个过滤条件,那么会尝试从每个值
connect by prior v = parent_v;
--在start with的条件中使用子查询
with t
as
(
select 1 as v,null parent_v,'01' vv from dual
union all
select 2 as v,1,'02' as vv from dual
union all
select 3 as v,2,'03' as vv from dual
union all
select 4 as v,2,'04' as vv from dual
union all
select 5 as v,1,'05' as vv from dual
union all
select 6 as v,5,'06' as vv from dual
union all
select 7 as v,6,'07' as vv from dual
union all
select 8 as v,5,'08' as vv from dual
union all
select 9 as v,8,'09' as vv from dual
union all
select 10 as v,1,'10' as vv from dual
union all
select 11 as v,10,'11' as vv from dual
union all
select 12 as v,10,'12' as vv from dual
union all
select 13 as v,10,'13' as vv from dual
)
/*
LPAD('',2*LEVEL-1)||VV
-----------------------------
05
06
07
08
09
*/
select lpad(' ', 2 * level - 1) || vv
from t
start with v = (select v from t where vv = '05')
connect by prior v = parent_v;
--通过connect by条件删除分支
with t
as
(
select 1 as v,null parent_v,'01' vv from dual
union all
select 2 as v,1,'02' as vv from dual
union all
select 3 as v,2,'03' as vv from dual
union all
select 4 as v,2,'04' as vv from dual
union all
select 5 as v,1,'05' as vv from dual
union all
select 6 as v,5,'06' as vv from dual
union all
select 7 as v,6,'07' as vv from dual
union all
select 8 as v,5,'08' as vv from dual
union all
select 9 as v,8,'09' as vv from dual
union all
select 10 as v,1,'10' as vv from dual
union all
select 11 as v,10,'11' as vv from dual
union all
select 12 as v,10,'12' as vv from dual
union all
select 13 as v,10,'13' as vv from dual
)
/*
LPAD('',2*LEVEL-1)||VV
-------------------------
05
08
09
*/
select lpad(' ', 2 * level - 1) || vv
from t
start with v = 5
connect by prior v = parent_v
and vv != '06' --会删除分支6,也即是删除节点6,及其子节点7
--这个条件在查询中间过程中进行过滤,会直接删除分支,而不是某个节点
--通过where条件,删除节点
with t
as
(
select 1 as v,null parent_v,'01' vv from dual
union all
select 2 as v,1,'02' as vv from dual
union all
select 3 as v,2,'03' as vv from dual
union all
select 4 as v,2,'04' as vv from dual
union all
select 5 as v,1,'05' as vv from dual
union all
select 6 as v,5,'06' as vv from dual
union all
select 7 as v,6,'07' as vv from dual
union all
select 8 as v,5,'08' as vv from dual
union all
select 9 as v,8,'09' as vv from dual
union all
select 10 as v,1,'10' as vv from dual
union all
select 11 as v,10,'11' as vv from dual
union all
select 12 as v,10,'12' as vv from dual
union all
select 13 as v,10,'13' as vv from dual
)
/*
LPAD('',2*LEVEL-1)||VV
---------------------------
05
07
08
09
*/
select lpad(' ', 2 * level - 1) || vv
from t
where vv != '06' --会删除节点6,但其子节点7还是存在的,where只是对查询的结果进行过滤,所以还是会返回节点7
start with v = 5
connect by prior v = parent_v
4、rollup、cube子句、grouping sets、grouping函数、grouping_id函数、group_id函数
--1.rollup
with t
as
(
select 1 as v,'abc' as vv,1 as vvv from dual
union all
select 2 as v,'def' as vv,2 as vvv from dual
union all
select 3 as v,'ghi' as vv,3 as vvv from dual
)
/*
V VV SUM(VVV)
-------------------- -------------------- ----------
1 abc 1
1 小计 1
2 def 2
2 小计 2
3 ghi 3
3 小计 3
总计 总计 6
*/
--需要特别注意rollup中的字段的顺序,不同顺序会有不同的结果
--grouping函数,返回1表示这个值是group by产生的,不是表本身的数据,返回0表示是本身的数据
select case when grouping(v) = 1
and grouping(vv) = 1
then '总计'
else cast(v as varchar2(20))
end as v,
case when grouping(v) = 0
and grouping(vv) = 1
then '小计'
when grouping(v) = 1
and grouping(vv) = 1
then '总计'
else cast(vv as varchar2(20))
end as vv,
sum(vvv)
from t
group by rollup(v,vv);
--2.cube
with t
as
(
select 1 as v,'abc' as vv,1 as vvv from dual
union all
select 2 as v,'def' as vv,2 as vvv from dual
union all
select 3 as v,'ghi' as vv,3 as vvv from dual
)
/*
V VV SUM(VVV)
-------------------- -------------------- ----------
1 abc 1
1 v-小计 1
2 def 2
2 v-小计 2
3 ghi 3
3 v-小计 3
vv-小计 abc 1
vv-小计 def 2
vv-小计 ghi 3
总计 总计 6
*/
--cube中的字段不讲究顺序,结果是一样的
--grouping_id(col1,col2,col3...,coln)函数,当n列是group by产生,也就是null时返回1
--其实grouping_id就是grouping的一个加强版
select case when grouping_id(v,vv) in (0,1) --0表示v与vv都非空,1表示vv列空而v列非空
then cast(v as varchar(20))
when grouping_id(v,vv) = 2 --2表示v列为空,vv为非空
then 'vv-小计'
when grouping_id(v,vv) = 3 --v与vv列都为空
then '总计'
end as v,
case when grouping_id(v,vv) in (0,2)
then cast(vv as varchar(20))
when grouping_id(v,vv) = 1
then 'v-小计'
when grouping_id(v,vv) = 3
then '总计'
end as vv,
sum(vvv)
from t
group by cube(v,vv)
order by v,vv;
--3.grouping sets子句,只返回小计
with t
as
(
select 1 as v,'abc' as vv,1 as vvv from dual
union all
select 2 as v,'def' as vv,2 as vvv from dual
union all
select 3 as v,'ghi' as vv,3 as vvv from dual
)
/*
V VV SUM(VVV)
---------- --- ----------
1 1
2 2
3 3
abc 1
def 2
ghi 3
*/
/*
其实就相当于:
group by v
union all
group by vv
*/
select v,
vv,
sum(vvv)
from t
group by grouping sets(v,vv)
order by v,vv;
--4.group_id函数,没有参数,如果某个分组重复出现n次,那么会返回从0到n-1之间的整数。
with t
as
(
select 1 as v,'abc' as vv,1 as vvv from dual
union all
select 2 as v,'def' as vv,2 as vvv from dual
union all
select 3 as v,'ghi' as vv,3 as vvv from dual
)
/*
V VV GROUPID SUM(VVV)
---------- --- ---------- ----------
1 abc 0 1
1 1 1
1 0 1
2 def 0 2
2 0 2
2 1 2
3 ghi 0 3
3 0 3
3 1 3
*/
/*
这里其实就是把v和rollup(v,vv)进行group by,但会产生重复值,也就是:
group by : v,(v,vv)
v,(v,null) => v,null => group_id()返回0
v,(null,null) => v,null => group_id()返回1
所以会产生2个v,null。
*/
select v,
vv,
group_id() as groupID,
sum(vvv)
from t
group by v,rollup(v,vv)
order by v,vv;
5、分析函数
create table t(
year int not null,
month int not null,
type_name varchar2(20) not null,
emp_id int not null,
amount number(10,2)
);
delete from t;
insert into t(year,month,type_name,emp_id,amount)
select *
from
(
select 2013 as v1, 1 as v2, 'Book' as v3, 1 as v4, 100 as v5 from dual
union all
select 2013, 2, 'Book', 1, 200 from dual
union all
select 2013, 3, 'Book', 1, 300 from dual
union all
select 2013, 4, 'Book', 1, 400 from dual
union all
select 2013, 5, 'Book', 1, 500 from dual
union all
select 2013, 6, 'Book', 1, 600 from dual
union all
select 2013, 7, 'Book', 1, 700 from dual
union all
select 2013, 8, 'Book', 1, 800 from dual
union all
select 2013, 9, 'Book', 1, 900 from dual
union all
select 2013, 10, 'Book', 1, 1000 from dual
union all
select 2013, 11, 'Book', 1, null from dual
union all
select 2013, 12, 'Book', 1, 1000 from dual
union all
select 2013, 1, 'Magazine', 1, 100 from dual
union all
select 2013, 2, 'Magazine', 2, 200 from dual
union all
select 2013, 3, 'Magazine', 1, 300 from dual
union all
select 2013, 4, 'Magazine', 2, 400 from dual
union all
select 2013, 5, 'Magazine', 1, 500 from dual
union all
select 2013, 6, 'Magazine', 2, 600 from dual
union all
select 2013, 7, 'Magazine', 1, 700 from dual
union all
select 2013, 8, 'Magazine', 2, 800 from dual
union all
select 2013, 9, 'Magazine', 1, null from dual
union all
select 2013, 10, 'Magazine', 2, 1000 from dual
union all
select 2013, 11, 'Magazine', 2, null from dual
union all
select 2013, 12, 'Magazine', 1, 800 from dual
);
commit;
--1.排名函数
--1.1在降序排列时,默认会把空值排到第一,而在升序排列时会把空值排到最后
select year,
month,
amount,
--在值相等的情况下,名次会留下空位
rank() over(order by amount desc) as rank,
--不会留有空位,是密集的
dense_rank() over(order by amount desc) as dense_rank,
--行号,类似于rownum伪列,但按照某个字段排序后再编号
row_number() over(order by amount desc) as row_number,
--按照返回的记录显示行号,与row_number显示的行号不同
rownum,
--按照字段排序,根据记录条数/分片数=12/4 = 3,也就是每个片有3条记录,一共4片
ntile(4) over(order by amount desc) as ntile
from t
where type_name = 'Magazine';
--1.2通过nulls first和nulls last来空值null在排序时,显示的位置
select year,
month,
amount,
emp_id,
--在值相等的情况下,名次会留下空位
rank() over(order by amount desc nulls last) as rank,
--不会留有空位,是密集的
dense_rank() over(order by amount desc nulls last) as dense_rank,
--行号,类似于rownum伪列,但按照某个字段排序后再编号
row_number() over(order by amount desc nulls last) as row_number,
--按照返回的记录显示行号,与row_number显示的行号不同
rownum,
--按照emp_id分组,这里每组有6条记录,再按字段排序,
--根据记录条数/分片数=6/4 = 1.5,前两个分片每片有2条记录,后2个分片每个1条记录
ntile(5) over(partition by emp_id
order by amount desc nulls last
) as ntile
from t
where type_name = 'Magazine'
order by emp_id,ntile;
--2.百分点函数、反百分点函数、假想评级分布函数
--百分点函数
select v,
--rank排名
rank() over(order by v desc) as rank,
--rank排名号/总的记录数
--如果有多个值相同,那么取相同值中最大的row_number/总的记录数
cume_dist() over(order by v desc) as cume_dist,
--(rank排名号-1)/(总的记录数 - 1)
--如果有多个值相同,那么取相同值中最小的row_number/总的记录数
percent_rank() over(order by v desc) as percent_rank
from
(
select 600 as v from dual
union all
select 400 as v from dual
union all
select 100 as v from dual
union all
select 300 as v from dual
union all
select 300 as v from dual
)
--反百分点函数、假想评级分布函数
select --反百分点函数
--在每个分组中检查累积分布的数值,直到找到大于或等于参数的值,与percent_disc相反
percentile_disc(0.8) within group (order by v desc) as percentile_disc,
--在每个分组中检查百分比排名的值,直到找到大于或等于参数的值,与percent_rank相反
percentile_cont(0.5) within group (order by v desc) as percentile_count,
--假想评级分布函数
--假设v的值为350,那么返回rank排名
rank(350) within group (order by v desc) as rank,
--假设v的值为350,那么返回percent_rank的百分比
percent_rank(350) within group (order by v desc) as percent_rank
from
(
select 600 as v from dual
union all
select 400 as v from dual
union all
select 100 as v from dual
union all
select 300 as v from dual
union all
select 300 as v from dual
)
--3.窗口函数
--下面用的是sum,但avg、count、max、min等函数都适用
select year,
month,
sum(amount) as amount,
--计算累积和
sum(sum(amount)) over(order by month
rows between unbounded preceding and current row) as cumulative_amount,
--计算移动累积和,本月与前3个月销量和
sum(sum(amount)) over(order by month
rows between 3 preceding and current row) as moving_amout,
--计算中心累积和,也就是本月、前一个月、后一个月的销量总和
sum(sum(amount)) over(order by month
rows between 1 preceding and 1 following) as moving_center_amount,
--获取窗口的第一条记录
first_value(sum(amount)) over(order by month
rows between 1 preceding and 1 following) as first_value_amount,
--获取窗口的最后一条记录
last_value(sum(amount)) over(order by month
rows between 1 preceding and 1 following) as last_value_amount,
--当前记录的向前第1条记录,如果前面没有记录,那么返回null
lag(sum(amount),1) over(order by month) as lag_amount,
--当前记录的下一条记录,如果后面没有记录,返回null
lead(sum(amount),1) over(order by month) as lead_amount,
--对null值的不同处理,默认是respect nulls,表示把null正常处理
lag(sum(amount),1) respect nulls over(order by month) as respect_nulls,
--ignore nulls表示忽略null,比如要找前一条记录,如果前一条记录是null,那么会跳过这条记录再往前找一条
lag(sum(amount),1) ignore nulls over(order by month) as ignore_nulls
from t
group by year,month
order by year,month;
--4.报表函数
--下面用到了sum,也适合avg,max,min,count
select month,
type_name,
sum(amount) as amout,
--对group by之后的结果,再次对结果进行group by month求sum
sum(sum(amount)) over(partition by month) as month_amount,
--对group by之后的结果,再次对结果进行group by type_name求sum
sum(sum(amount)) over(partition by type_name) as type_name_amount,
--计算某个月某个品类的amount/某个月不分品类的amount
ratio_to_report(sum(amount)) over(partition by month) as ratio_to_report
from t
group by month,type_name
order by month,type_name;
--5.first函数、last函数
--适用于min、max、count、sum、avg
select
--按照amount的和排序,求dense_rank,取排第1的,然后求最小的month
min(month) keep (dense_rank first order by sum(amount)) as first_amount,
max(month) keep (dense_rank last order by sum(amount)) as last_amount
from t
group by month
order by month;
6、model子句、povit与unpovit
--1.位置标记访问
select *
from t
model
partition by (type_name)
dimension by (month,year)
measures (amount sales_amount)
(
sales_amount[1,2014] = sales_amount[1,2013],
sales_amount[2,2014] = sales_amount[2,2013] + sales_amount[3,2013],
sales_amount[3,2014] = round(sales_amount[3,2013] * 1.5, 2)
)
order by type_name,year,month;
--2.符号标记访问
--注意位置必须要对齐,
select type_name,year,month,sales_amount
from t
model
partition by (type_name)
dimension by (month,year)
measures (amount sales_amount)
(
sales_amount[month = 1,year = 2014] = sales_amount[month = 1,year = 2014],
sales_amount[month = 1,year = 2014] = sales_amount[month = 1,year = 2014],
sales_amount[month = 1,year = 2014] = round(sales_amount[month = 1,year = 2014] * 1.5,2)
)
order by type_name,year,month;
--3.1 between and
select *
from t
model
partition by (type_name)
dimension by (month,year)
measures (amount sales_amount)
(
--这里不太清楚为什么如果采用标记访问,会导致不会产生新的1,2014的记录
--sales_amount[month =1,year =2014] = sum(sales_amount)[month between 1 and 3,2013]
--必须要用sum函数,否则会报错
sales_amount[1,2014] = sum(sales_amount)[month between 1 and 3,2013]
)
order by type_name,year,month;
--3.2通过any和is any,表示任何维度的数据
select *
from t
model
partition by (type_name)
dimension by (month,year)
measures (amount sales_amount)
(
--这里不太清楚为什么如果采用标记访问,会导致不会产生新的1,2014的记录
--sales_amount[month =1,year =2014] = sum(sales_amount)[month between 1 and 3,2013]
--必须要用sum
sales_amount[1,2014] = sum(sales_amount)[any,year is any]
)
order by type_name,year,month;
--3.3 for循环
--currentv()函数来访问当前的维度
--is present检查单元格是否存在
--is not null检查是否是null值
select *
from t
model
partition by (type_name)
dimension by (month,year)
measures (amount sales_amount)
(
sales_amount[for month from 10 to 12 increment 1,2014] =
case when sales_amount[currentv(),2013] is present and
sales_amount[currentv(),2013] is not null
then sales_amount[currentv(),2013]
else 0
end
)
order by type_name,year,month;
--3.3 ignore nav和keep nav
select *
from t
model ignore nav --忽略null,自动返回0
partition by (type_name)
dimension by (month,year)
measures (amount sales_amount)
(
sales_amount[for month from 10 to 12 increment 1,2014] =
case when sales_amount[currentv(),2013] is present
then sales_amount[currentv(),2013]
else 0
end
)
order by type_name,year,month;
--3.4 rules update更新已经存在的值,如果不存在,不会创建,也就是只会更新
select *
from t
model ignore nav --忽略null,自动返回0
partition by (type_name)
dimension by (month,year)
measures (amount sales_amount)
rules update --返回结果集中,不会包含2014年的数据
(
sales_amount[for month from 10 to 12 increment 1,2014] =
case when sales_amount[currentv(),2013] is present
then sales_amount[currentv(),2013]
else 0
end
)
order by type_name,year,month;
--4.pivot
--4.1 单列转置
select *
from
(
select type_name,
year,
month,
amount
from t
)
pivot
(
--虽然有12个月的数据,但这里只需要1-4月份的数据
sum(amount) for month in (1 as jan,2 as feb,3 as mar,4 apr)
)
order by type_name,year;
--4.2 多列转置
select *
from
(
select type_name,year,month,amount
from t
)
pivot
(
sum(amount) for (month,type_name) in ( (1,'Book') as jan_book,
(2,'Book') as feb_book,
(3,'Book') as mar_book
)
);
--5. unpivot
SELECT *
FROM
(
select *
from
(
select type_name,
year,
month,
amount
from t
)
pivot
(
--虽然有12个月的数据,但这里只需要1-4月份的数据
sum(amount) for month in (1 as jan,2 as feb,3 as mar,4 apr)
)
)
unpivot
(
amount for month in (JAN as 1,FEB as 2,MAR as 3,APR as 4)
)