oracle基于3种方法的大数据量插入更新

最新推荐文章于 2024-04-15 15:18:02 发布

利物浦希莫斯

最新推荐文章于 2024-04-15 15:18:02 发布

阅读量1.6w

点赞数 9

分类专栏： oracle 文章标签： oracle sql

本文链接：https://blog.csdn.net/u012431409/article/details/52001925

版权

oracle 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

过程插入更新的3种方法：

a、逐条检查插入或更新，同时执行插入或更新

b、逐条merge into（逐条是为了记录过程日志与错误信息）

c、基于关联数组的检查插入、更新，通过forall批量sql执行

以下为模拟步骤：

1、创建模拟大表，数据量1亿不分区

create table big_table as 
SELECT ROWNUM RN,'A' A,'B' B,'C' C,'D' D FROM    
      ( SELECT ROWNUM RN FROM DUAL CONNECT BY ROWNUM <= 1000) T1，
      ( SELECT ROWNUM RN FROM DUAL CONNECT BY ROWNUM <= 1000) T2，
      ( SELECT ROWNUM RN FROM DUAL CONNECT BY ROWNUM <= 100) T3;

说明：用connect by方法创建测试大表相对较快

2、给大表加上一个主键，因为插入更新基于主键

alter table BIG_TABLE2
  add constraint PK_RN2 primary key (RN)

3、创建模拟的更新表，数据量20万

CREATE TABLE UP_TABLE ( RN NUMBER,A VARCHAR2(20),B VARCHAR2(20),C VARCHAR2(20),D VARCHAR2(20))

4、给更新表加数据，10万更新，10万插入，写的相对烦人些。。。代码不贴出来了

5、日志表结构

-- Create table
create table INS_UP_LOG
(
  type VARCHAR2(50),
  time NUMBER,
  num  NUMBER,
  up   NUMBER,
  ins  NUMBER
)

6、第一种方法的过程如下：

create or replace procedure sp_ins_up is
  v_bz    char(1);--插入更新标志
  v_stime pls_integer;--过程起始时间
  i       pls_integer := 0; --执行数量
  v_up    pls_integer := 0;--更新数量
  v_ins   pls_integer := 0;--插入数量
  v_time  pls_integer;--循环当前循环起始时间
begin
  v_stime := dbms_utility.get_time();--过程起始时间
  v_time  := dbms_utility.get_time();--当前循环起始时间
  for rec in (select * from up_table) loop--开始循环
    i := i + 1;
    begin--当前数据执行插入更行判断
      select '1' into v_bz from big_table1 a where a.rn = rec.rn;
    exception
      when no_data_found then
        v_bz := 0;
    end;
    if v_bz = 1 then--更新
      v_up := v_up + 1;
      update big_table1 a
         set a.a = rec.a, a.b = rec.b, a.c = rec.c, a.d = rec.d
         where a.rn = rec.rn;
    else--插入
      v_ins := v_ins + 1;
      insert into big_table1 values (rec.rn, rec.a, rec.b, rec.c, rec.d);
    end if;
    if mod(i, 5000) = 0 then--一定数量提交
      insert into INS_UP_LOG
      values
        ('sp_ins_up',
         round((dbms_utility.get_time - v_time) / 100, 2),
         v_up + v_ins,
         v_up,
         v_ins);
      commit;
      v_up   := 0;
      v_ins  := 0;
      v_time := dbms_utility.get_time;
    end if;
  end loop;
  commit;
  insert into ins_up_log--过程总时间记录
  values
    ('sp_ins_up',
     round((dbms_utility.get_time - v_stime) / 100, 2),
     '',
     '',
     '');
  commit;
end sp_ins_up;

7、merge into方法过程如下：

create or replace procedure sp_merge is
  v_stime  pls_integer; --过程起始时间
  i        pls_integer := 0; --执行数量
  v_time   pls_integer; --循环当前循环起始时间
  v_rn     up_table.rn%type; --缓存值
  v_errmsg varchar2(500); --当前错误信息
begin
  v_stime := dbms_utility.get_time();
  v_time  := dbms_utility.get_time();
  for rec in (select * from up_table) loop
    i    := i + 1;
    v_rn := rec.rn; --缓存当前主键
    merge into big_table3 t --merge into
    using (select * from up_table where rn = rec.rn) a
    on (t.rn = a.rn)
    when matched then
      update set t.a = a.a, t.b = a.b, t.c = a.c, t.d = a.d
    when not matched then
      insert values (a.rn, a.a, a.b, a.c, a.d);
    v_errmsg := sqlerrm;
    if mod(i, 5000) = 0 then
      insert into ins_up_log
      values
        ('sp_merge',
         round((dbms_utility.get_time - v_time) / 100, 2),
         i,
         i / 2, --此处插入数量不必在意
         i / 2);
      commit;
      v_time := dbms_utility.get_time();
    end if;
  end loop;
  insert into ins_up_log --过程总时间记录
  values
    ('sp_merge',
     round((dbms_utility.get_time - v_stime) / 100, 2),
     i,
     '',
     '');
  commit;
exception
  when others then
    insert into ins_up_err values ('sp_merge', v_rn, v_errmsg);
end sp_merge;

8、forall方法，结合第一种方法的判断方式：

create or replace procedure sp_forall_new is
  type table_type is table of up_table%rowtype index by pls_integer; --增量表缓存
  tab_all table_type; --增量表全
  tab_up  table_type; --增量表-更新
  tab_ins table_type; --增量表-插入
  v_bz    char(1); --存在标志
  cursor cur_up is
    select * from up_table; --更新表游标
  v_stime   pls_integer; --过程开始时间
  v_time    pls_integer; --每次循环开始时间
  v_num_ins pls_integer := 0; --每次循环插入数量
  v_num_up  pls_integer := 0; --每次循环更新数量
  v_num     number; --数量缓存值
  v_err_msg number; --forall中错误信息
begin
  v_stime := dbms_utility.get_time(); --过程开始
  open cur_up; --打开游标
  loop
    --开始循环游标
    v_time    := dbms_utility.get_time(); --当前循环开始时间
    v_num_ins := 0; --每次循环置零
    v_num_up  := 0; --每次循环置零
    fetch cur_up bulk collect
      into tab_all limit 5000; --批量插入缓存表，每次限定5000条
    exit when tab_all.count = 0; --当缓存表中无数据，退出循环
    --
    for i in 1 .. tab_all.count loop
      --此循环功能：判断是否存在目标表中
      begin
        select '1' into v_bz from big_table2 t where t.rn = tab_all(i).rn; --存在
      exception
        when no_data_found then
          v_bz := '0'; --不存在
      end;
      --
      if v_bz = '1' then
        --存在
        v_num_up := v_num_up + 1; --更新数值+1
        tab_up(v_num_up) := tab_all(i); --复制到更新缓存表
      else
        --不存在
        v_num_ins := v_num_ins + 1; --插入数值+1
        tab_ins(v_num_ins) := tab_all(i); --复制到插入缓存表
      end if;
      --
    end loop;
    --
    --批量SQL，且在批量执行中错误的信息记录在sql%bulk_exceptions缓存表中
    if tab_up.count <> 0 then
      forall i in tab_up.first .. tab_up.last save exceptions
        update big_table2 t
           set t.a = tab_up(i).a,
               t.b = tab_up(i).b,
               t.c = tab_up(i).c,
               t.d = tab_up(i).d
         where t.rn = tab_up(i).rn;
      for i in 1 .. sql%bulk_exceptions.count loop
        v_num     := sql%bulk_exceptions(i).error_index;
        v_err_msg := sqlerrm(-sql%bulk_exceptions(i).error_code);
        insert into ins_up_err
        values
          ('sp_forall', tab_up(v_num).rn, v_err_msg);
      end loop;
      commit;
    end if; --更新批量结束
    --批量SQL，且在批量执行中错误的信息记录在sql%bulk_exceptions缓存表中
    if tab_ins.count <> 0 then
      forall i in tab_ins.first .. tab_ins.last save exceptions
        insert into big_table2
        values
          (tab_ins(i).rn,
           tab_ins(i).a,
           tab_ins(i).b,
           tab_ins(i).c,
           tab_ins(i).d);
      for i in 1 .. sql%bulk_exceptions.count loop
        v_num     := sql%bulk_exceptions(i).error_index;
        v_err_msg := sqlerrm(-sql%bulk_exceptions(i).error_code);
        insert into ins_up_err
        values
          ('sp_forall', tab_ins(v_num).rn, v_err_msg);
      end loop;
      commit;
    end if; --插入批量结束
    insert into ins_up_log --记录日志
    values
      ('sp_forall',
       round((dbms_utility.get_time - v_time) / 100, 2),
       5000,
       v_num_up,
       v_num_ins);
    commit;
    --清空当前循环插入、更新缓存表数据（不清空，下次循环重复执行）
    tab_up.delete;
    tab_ins.delete;
  end loop;
  close cur_up; --关闭游标
  insert into ins_up_log --过程总时间记录
  values
    ('sp_forall',
     round((dbms_utility.get_time - v_stime) / 100, 2),
     '',
     '',
     '');
  commit;
end sp_forall_new;

最后结果：同样对一亿的表插入更新20万条数据,多次执行平均时间

sp_merge 14.48秒
sp_forall 6.63秒
sp_ins_up 44.33秒

从每5000条提交一次的时间可以得出来，forall最稳定，其次merge稍有起伏，手动执行插入更新浮动最大从0.3秒到6.9秒不等。

效率方面：forall优势明显，其次merge也不差，手动插入更新最慢且不稳定

代码方面：merge into与手动插入更新相对简洁，forall就比较复杂

扩展性：手动插入更新可以加上许多业务性功能，forall方式目前oracle也提供了相当多的函数用于数据处理，所以次之，merge into就个人而言，相对笨重一些了。

利物浦希莫斯

关注

9
点赞
踩
33

收藏

觉得还不错? 一键收藏
0
评论
oracle基于3种方法的大数据量插入更新

过程插入更新的3种方法：a、逐条检查插入或更新，同时执行插入或更新b、逐条merge into（逐条是为了记录过程日志与错误信息）c、基于关联数组的检查插入、更新，通过forall批量sql执行以下为模拟步骤：1、创建模拟大表，数据量1亿不分区create table big_table as SELECT ROWNUM RN,'A' A,'B' B,'C' C,'D' D
复制链接

扫一扫