row_number() over (partition by col1 order by col2)
表示根据col1分组,在分组内部根据 col2排序
而这个值就表示每组内部排序后的顺序编号(组内连续的唯一的)
--drop table delete_dobule_t;
create table delete_dobule_t
(
col_01 varchar2(4),
col_02 varchar2(4),
col_03 varchar2(4)
);
insert into delete_dobule_t
values ('A','1','test');
insert into delete_dobule_t
values ('A','2','test');
insert into delete_dobule_t
values ('A','3','test');
insert into delete_dobule_t
values ('A','4','test');
insert into delete_dobule_t
values ('B','1','test');
insert into delete_dobule_t
values ('B','2','test');
insert into delete_dobule_t
values ('B','3','test');
insert into delete_dobule_t
values ('C','1','test');
insert into delete_dobule_t
values ('C','2','test');
insert into delete_dobule_t
values ('C','3','test');
insert into delete_dobule_t
values ('C','4','test');
commit;
COL_01 COL_02 COL_03 A 1 test
A 2 test
A 3 test
A 4 test
B 1 test
B 2 test
B 3 test
C 1 test
C 2 test
C 3 test
C 4 test
我想有一个sql语句,搜索的结果是
A | 1 | test
A | 2 | test
B | 1 | test
B | 2 | test
C | 1 | test
C | 2 | test
实现:
select COL_01,COL_02,COL_03
from(select COL_01,COL_02,COL_03 ,row_number()over(partition by
COL_01 order by COL_02) id
from delete_dobule_t) where id<3;
---查询结果展示
select COL_01,COL_02,COL_03,id
from(select COL_01,COL_02,COL_03 ,row_number()over(partition by
COL_01 order by COL_02) id
from delete_dobule_t) ;
COL_01 COL_02 COL_03 ID
A 1 test 1
A 2 test 2
A 3 test 3
A 4 test 4
B 1 test 1
B 2 test 2
B 3 test 3
C 1 test 1
C 2 test 2
C 3 test 3
C 4 test 4
-----想要删除重复保留
A | 1 | test
B | 1 | test
C | 1 | test
-----语句
delete delete_dobule_t where rowid in(
select rowid
from(select rowid ,row_number()over(partition by COL_01 order by
COL_02) id
from delete_dobule_t)
where id<>1);
-----其他删除重复方法--------------------------
下面是查询重复数据的一个例子:
select a.rowid,a.* from 表名 a
where a.rowid !=
(
select max(b.rowid) from 表名 b
where a.字段1 = b.字段1 and
a.字段2 = b.字段2
)
下面我就来讲解一下,上面括号中的语句是查询出重复数据中rowid最大的一条记录。
而外面就是查询出除了rowid最大之外的其他重复的数据了。
由此,我们要删除重复数据,只保留最新的一条数据,就可以这样写了:
delete from 表名 a
where a.rowid !=
(
select max(b.rowid) from 表名 b
where a.字段1 = b.字段1 and
a.字段2 = b.字段2
)
随便说一下,上面语句的执行效率是很低的,可以考虑建立临时表,讲需要判断重复的字段、rowid插入临时表中,然后删除的时候在进行比较。
create table 临时表 as
select a.字段1,a.字段2,MAX(a.ROWID) dataid from 正式表 a GROUP BY
a.字段1,a.字段2;
delete from 表名 a
where a.rowid !=
(
select b.dataid from 临时表 b
where a.字段1 = b.字段1 and
a.字段2 = b.字段2
);
commit;