一:大批量删除数据的脚本
说明:
有两张表a表(2000w数据),b表(30w数据)
b.id肯定能在a.id中找到
删掉a表中b.id和a.id不能匹配的数据
如b.id有1,2,3,4
a.id有1,2,3,4,5,6,7,8
把5,6,7,8在a表中删掉
create or replace procedure DelBatchData(LimitRows number) is
c sys_refcursor;
Type v_rowid is table of varchar2(30) index by binary_integer;
var_rowid v_rowid;
begin
open c for
select rowid
from a s1 where not exists (select 1 from b s2 where s1.id=s2.id);
fetch c bulk collect into var_rowid limit LimitRows;
close c;
forall i in 1..var_rowid.count
delete from a where rowid=var_rowid(i);
commit;
exception
when others then
rollback;
raise_application_error(-20001,sqlerrm);
end;
二:大批量更新数据时(zt)。
详见http://www.itpub.net/thread-1052077-1-3.html
说明:
要更新的表:T1 (id1 number, id2 number, curr_count number,.....) --id1唯一 5亿条记录 >60GB
更新数据来源:T2 (id2 number, curr_count number) --id2唯一 4.8亿
更新逻辑:T2中的每一条记录,都到T1中找到对应的记录(T2.id2=T1.id2),更新T1.curr_count=T2.curr_count
限制条件:只能在线更新(应用程序一直在访问这个表,所以不能用INSERT SELECT)
alter table T1 storage(buffer_pool keep); -- keep buffer pool size = 6GB
declare
cursor cur_t2 is
select /*+ use_hash(T1,T2) parallel(T1,16) parallel_index(IX_T1_id2,16) */
T2.id2, T2.curr_count, T1.rowid row_id
from T1, T2
where T1.id2=T2.id2
order by T1.rowid; v_counter number;
begin
v_counter := 0;
for row_t2 in cur_t2 loop
update T1 set curr_count=row_t2.curr_count
where rowid=row_t2.row_id;
v_counter := v_counter + 1;
if (v_counter>=1000) then
commit;
v_counter := 0;
end if;
end loop;
commit;
end;
/
alter table T1 storage(buffer_pool default);
注:
1。这里速度加快的主要原因是避免了重复读取block
2。进一步加快读取可以用bulk collect
说明:
有两张表a表(2000w数据),b表(30w数据)
b.id肯定能在a.id中找到
删掉a表中b.id和a.id不能匹配的数据
如b.id有1,2,3,4
a.id有1,2,3,4,5,6,7,8
把5,6,7,8在a表中删掉
create or replace procedure DelBatchData(LimitRows number) is
c sys_refcursor;
Type v_rowid is table of varchar2(30) index by binary_integer;
var_rowid v_rowid;
begin
open c for
select rowid
from a s1 where not exists (select 1 from b s2 where s1.id=s2.id);
fetch c bulk collect into var_rowid limit LimitRows;
close c;
forall i in 1..var_rowid.count
delete from a where rowid=var_rowid(i);
commit;
exception
when others then
rollback;
raise_application_error(-20001,sqlerrm);
end;
二:大批量更新数据时(zt)。
详见http://www.itpub.net/thread-1052077-1-3.html
说明:
要更新的表:T1 (id1 number, id2 number, curr_count number,.....) --id1唯一 5亿条记录 >60GB
更新数据来源:T2 (id2 number, curr_count number) --id2唯一 4.8亿
更新逻辑:T2中的每一条记录,都到T1中找到对应的记录(T2.id2=T1.id2),更新T1.curr_count=T2.curr_count
限制条件:只能在线更新(应用程序一直在访问这个表,所以不能用INSERT SELECT)
alter table T1 storage(buffer_pool keep); -- keep buffer pool size = 6GB
declare
cursor cur_t2 is
select /*+ use_hash(T1,T2) parallel(T1,16) parallel_index(IX_T1_id2,16) */
T2.id2, T2.curr_count, T1.rowid row_id
from T1, T2
where T1.id2=T2.id2
order by T1.rowid; v_counter number;
begin
v_counter := 0;
for row_t2 in cur_t2 loop
update T1 set curr_count=row_t2.curr_count
where rowid=row_t2.row_id;
v_counter := v_counter + 1;
if (v_counter>=1000) then
commit;
v_counter := 0;
end if;
end loop;
commit;
end;
/
alter table T1 storage(buffer_pool default);
注:
1。这里速度加快的主要原因是避免了重复读取block
2。进一步加快读取可以用bulk collect