----需求是:图片数据删除
/*
条件是seedid=0的图片,每个资产
id每个图片类型的图片数量大于1的,
只保留创建时间最早的那一张。
*/
---创建备份表:--120028总数据
create table d_spider_image_20100706 as
select * from d_spider_image where seedid=0;
--- --20541重复数目 ---42289根据重复数据查询出来的结果
select count(*) from d_spider_image kk where kk.seedid=0
and(kk.schemaid,kk.typecode) in (
select j.schemaid,j.typecode from d_spider_image j
where j.seedid=0
group by j.schemaid,j.typecode
having count(*)>1)
order by kk.schemaid, kk.typecode,kk.createtime;
---创建个临时的重复数据表
create table d_spider_image_tmp as
select * from d_spider_image_20100706 kk where kk.seedid=0
and(kk.schemaid,kk.typecode) in (
select j.schemaid,j.typecode from d_spider_image_20100706 j
where j.seedid=0
group by j.schemaid,j.typecode
having count(*)>1)
-- 测试删除重复数据的方法
select * from d_spider_image_tmp kk
order by kk.schemaid, kk.typecode,kk.createtime;
---删除21748--- 等于20541 是正确的
--delete from d_spider_image_tmp kk
select count(*) from d_spider_image_tmp kk
where kk.seedid=0
and(kk.schemaid,kk.typecode) in (
select j.schemaid,j.typecode from d_spider_image_tmp j
where j.seedid=0
group by j.schemaid,j.typecode
having count(*)>1)
and rowid not in (select min (rowid) from d_spider_image_tmp where seedid=0
group by schemaid,typecode
having count(*)>1 )
select * from d_spider_image_tmp kk
order by kk.schemaid, kk.typecode,kk.createtime;
rollback;
--- 删除重复数据 21748
delete from d_spider_image kk
--select count(*) from d_spider_image kk
where kk.seedid=0
and(kk.schemaid,kk.typecode) in (
select j.schemaid,j.typecode from d_spider_image j
where j.seedid=0
group by j.schemaid,j.typecode
having count(*)>1)
and rowid not in (select min (rowid) from d_spider_image where seedid=0
group by schemaid,typecode
having count(*)>1 )
commit;
rollback;
--下面是如果刚才删除有误的话,需要从刚才备份的表中还原原来的数据步骤
--- 删除的数据有问题 现在通过备份表d_spider_image_20100706 还原数据
select count(*) from d_spider_image_20100706 --1605986
select count(*) from d_spider_image --1576571
-- 全部删除表里面的数据
truncate table d_spider_image
--通过insert into方法把备份表里面的数据库插入到原始表
insert /*+ append */ into d_spider_image select * from d_spider_image_20100706
commit;
select count(*) from d_spider_image --1605986
这里就OK拉。
来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/15713890/viewspace-667345/,如需转载,请注明出处,否则将追究法律责任。
转载于:http://blog.itpub.net/15713890/viewspace-667345/