现有一张表 dmp_result 无主键无索引,DATA_RECORD_ID、ORGANIZATION_ID、DATA_TIME理论上三个唯一,优化此表构建上面3列联合唯一约束,需要删除重复数据;
dmp_result 约数据400W 大约有重复数据3W条
查询表重复数据也就是待删除数据
select * from dmp_result a
where (a.DATA_RECORD_ID,a.ORGANIZATION_ID,a.DATA_TIME) in (select DATA_RECORD_ID,ORGANIZATION_ID,DATA_TIME from dmp_result group by DATA_RECORD_ID,ORGANIZATION_ID,DATA_TIME having
count(*) > 1)
and rowid not in (select min(rowid) from dmp_result group by DATA_RECORD_ID,ORGANIZATION_ID,DATA_TIME having count(*)>1)
如果直接执行上述sql delete 执行时间会非常漫长不可取;
可复制两张临时表,一张插入需要删除数据,一张取其差集就是想得到的数据(临时表无约束小心重复导入)
创建临时表dmp_result1、dmp_result2
create table dmp_result1 as select * from dmp_result
create table dmp_result2 as select * from dmp_result
减少日志产生
alter table dmp_result1 nologging;
alter table dmp_result2 nologging;
创建临时表dmp_result1插入需要删除数据
insert /*+ append */ into dmp_result1 (
select * from dmp_result a
where (a.DATA_RECORD_ID,a.ORGANIZATION_ID,a.DATA_TIME) in (select DATA_RECORD_ID,ORGANIZATION_ID,DATA_TIME from dmp_result group by DATA_RECORD_ID,ORGANIZATION_ID,DATA_TIME having
count(*) > 1)
and rowid not in (select min(rowid) from dmp_result group by DATA_RECORD_ID,ORGANIZATION_ID,DATA_TIME having count(*)>1)
);
dmp_result2插入差集
insert /*+ append */ into dmp_result2 (
select * from dmp_result
minus select * from dmp_result1
);
后面可以将dmp_result2 更换成 dmp_result,也可以将dmp_result清空后插入dmp_result2的数据即可
重新构建约束
alter table dmp_result rename to dmp_result3;
alter table dmp_result2 rename to dmp_result;
截断表有风险请做好备份
truncate table ems_inspect_record;
insert /*+ append */ into ems_inspect_record (
select * from ems_inspect_record2
);