最近公司开始用stream来做BI的数据同步,开始时还算顺利,做的downstream数据库表级复制,结果过了半个月要往里面加新的表,结果重启stream后destination库的apply进程开始报错,虽然DISABLE_ON_ERROR已经设置为N,但是错误越来越多,没有办法只好开始研究,从开始到最终解决问题过程比较曲折,还经历过重新impdp表,现在记录一下解决过程备忘。
首先贴上几个过程,当时找了很久,metalink上找了半天,这个过程是用于输出apply的错误信息的,否则在dba_apply_error里面什么都看不到。
1.print_any
- CREATE OR REPLACE PROCEDURE print_any (DATA IN ANYDATA)
- IS
- tn VARCHAR2 (61);
- str VARCHAR2 (4000);
- CHR VARCHAR2 (1000);
- num NUMBER;
- dat DATE;
- rw RAW (4000);
- res NUMBER;
- BEGIN
- IF DATA IS NULL
- THEN
- DBMS_OUTPUT.put_line ('NULL value');
- RETURN;
- END IF;
- tn := DATA.gettypename ();
- IF tn = 'SYS.VARCHAR2'
- THEN
- res := DATA.getvarchar2 (str);
- DBMS_OUTPUT.put_line (SUBSTR (str, 0, 253));
- ELSIF tn = 'SYS.CHAR'
- THEN
- res := DATA.getchar (CHR);
- DBMS_OUTPUT.put_line (SUBSTR (CHR, 0, 253));
- ELSIF tn = 'SYS.VARCHAR'
- THEN
- res := DATA.getvarchar (CHR);
- DBMS_OUTPUT.put_line (CHR);
- ELSIF tn = 'SYS.NUMBER'
- THEN
- res := DATA.getnumber (num);
- DBMS_OUTPUT.put_line (num);
- ELSIF tn = 'SYS.DATE'
- THEN
- res := DATA.getdate (dat);
- DBMS_OUTPUT.put_line (dat);
- ELSIF tn = 'SYS.RAW'
- THEN
- -- res := data.GETRAW(rw);
- -- DBMS_OUTPUT.PUT_LINE(SUBSTR(DBMS_LOB.SUBSTR(rw),0,253));
- DBMS_OUTPUT.put_line ('BLOB Value');
- ELSIF tn = 'SYS.BLOB'
- THEN
- DBMS_OUTPUT.put_line ('BLOB Found');
- ELSE
- DBMS_OUTPUT.put_line ('typename is ' || tn);
- END IF;
- END print_any;
2.print_errors
- CREATE OR REPLACE PROCEDURE print_errors
- IS
- CURSOR c
- IS
- SELECT local_transaction_id, source_database, message_number,
- message_count, error_number, error_message
- FROM dba_apply_error
- ORDER BY source_database, source_commit_scn;
- i NUMBER;
- txnid VARCHAR2 (30);
- SOURCE VARCHAR2 (128);
- msgno NUMBER;
- msgcnt NUMBER;
- errnum NUMBER := 0;
- errno NUMBER;
- errmsg VARCHAR2 (255);
- lcr ANYDATA;
- r NUMBER;
- BEGIN
- FOR r IN c
- LOOP
- errnum := errnum + 1;
- msgcnt := r.message_count;
- txnid := r.local_transaction_id;
- SOURCE := r.source_database;
- msgno := r.message_number;
- errno := r.error_number;
- errmsg := r.error_message;
- DBMS_OUTPUT.put_line
- ('*************************************************');
- DBMS_OUTPUT.put_line ('----- ERROR #' || errnum);
- DBMS_OUTPUT.put_line ('----- Local Transaction ID: ' || txnid);
- DBMS_OUTPUT.put_line ('----- Source Database: ' || SOURCE);
- DBMS_OUTPUT.put_line ('----Error in Message: ' || msgno);
- DBMS_OUTPUT.put_line ('----Error Number: ' || errno);
- DBMS_OUTPUT.put_line ('----Message Text: ' || errmsg);
- FOR i IN 1 .. msgcnt
- LOOP
- DBMS_OUTPUT.put_line ('--message: ' || i);
- lcr := DBMS_APPLY_ADM.get_error_message (i, txnid);
- print_lcr (lcr);
- END LOOP;
- END LOOP;
- END print_errors;
3.print_lcr
- CREATE OR REPLACE PROCEDURE print_lcr (lcr IN ANYDATA)
- IS
- typenm VARCHAR2 (61);
- ddllcr SYS.lcr$_ddl_record;
- proclcr SYS.lcr$_procedure_record;
- rowlcr SYS.lcr$_row_record;
- res NUMBER;
- newlist SYS.lcr$_row_list;
- oldlist SYS.lcr$_row_list;
- ddl_text CLOB;
- ext_attr ANYDATA;
- BEGIN
- typenm := lcr.gettypename ();
- DBMS_OUTPUT.put_line ('type name: ' || typenm);
- IF (typenm = 'SYS.LCR$_DDL_RECORD')
- THEN
- res := lcr.getobject (ddllcr);
- DBMS_OUTPUT.put_line ( 'source database: '
- || ddllcr.get_source_database_name
- );
- DBMS_OUTPUT.put_line ('owner: ' || ddllcr.get_object_owner);
- DBMS_OUTPUT.put_line ('object: ' || ddllcr.get_object_name);
- DBMS_OUTPUT.put_line ('is tag null: ' || ddllcr.is_null_tag);
- DBMS_LOB.createtemporary (ddl_text, TRUE);
- ddllcr.get_ddl_text (ddl_text);
- DBMS_OUTPUT.put_line ('ddl: ' || ddl_text);
- -- Print extra attributes in DDL LCR
- ext_attr := ddllcr.get_extra_attribute ('serial#');
- IF (ext_attr IS NOT NULL)
- THEN
- DBMS_OUTPUT.put_line ('serial#: ' || ext_attr.accessnumber ());
- END IF;
- ext_attr := ddllcr.get_extra_attribute ('session#');
- IF (ext_attr IS NOT NULL)
- THEN
- DBMS_OUTPUT.put_line ('session#: ' || ext_attr.accessnumber ());
- END IF;
- ext_attr := ddllcr.get_extra_attribute ('thread#');
- IF (ext_attr IS NOT NULL)
- THEN
- DBMS_OUTPUT.put_line ('thread#: ' || ext_attr.accessnumber ());
- END IF;
- ext_attr := ddllcr.get_extra_attribute ('tx_name');
- IF (ext_attr IS NOT NULL)
- THEN
- DBMS_OUTPUT.put_line ( 'transaction name: '
- || ext_attr.accessvarchar2 ()
- );
- END IF;
- ext_attr := ddllcr.get_extra_attribute ('username');
- IF (ext_attr IS NOT NULL)
- THEN
- DBMS_OUTPUT.put_line ('username: ' || ext_attr.accessvarchar2 ());
- END IF;
- DBMS_LOB.freetemporary (ddl_text);
- ELSIF (typenm = 'SYS.LCR$_ROW_RECORD')
- THEN
- res := lcr.getobject (rowlcr);
- DBMS_OUTPUT.put_line ( 'source database: '
- || rowlcr.get_source_database_name
- );
- DBMS_OUTPUT.put_line ('owner: ' || rowlcr.get_object_owner);
- DBMS_OUTPUT.put_line ('object: ' || rowlcr.get_object_name);
- DBMS_OUTPUT.put_line ('is tag null: ' || rowlcr.is_null_tag);
- DBMS_OUTPUT.put_line ('command_type: ' || rowlcr.get_command_type);
- oldlist := rowlcr.get_values ('old');
- FOR i IN 1 .. oldlist.COUNT
- LOOP
- IF oldlist (i) IS NOT NULL
- THEN
- DBMS_OUTPUT.put_line ('old(' || i || '): '
- || oldlist (i).column_name
- );
- print_any (oldlist (i).DATA);
- END IF;
- END LOOP;
- newlist := rowlcr.get_values ('new', 'n');
- FOR i IN 1 .. newlist.COUNT
- LOOP
- IF newlist (i) IS NOT NULL
- THEN
- DBMS_OUTPUT.put_line ('new(' || i || '): '
- || newlist (i).column_name
- );
- print_any (newlist (i).DATA);
- END IF;
- END LOOP;
- -- Print extra attributes in row LCR
- ext_attr := rowlcr.get_extra_attribute ('row_id');
- IF (ext_attr IS NOT NULL)
- THEN
- DBMS_OUTPUT.put_line ('row_id: ' || ext_attr.accessurowid ());
- END IF;
- ext_attr := rowlcr.get_extra_attribute ('serial#');
- IF (ext_attr IS NOT NULL)
- THEN
- DBMS_OUTPUT.put_line ('serial#: ' || ext_attr.accessnumber ());
- END IF;
- ext_attr := rowlcr.get_extra_attribute ('session#');
- IF (ext_attr IS NOT NULL)
- THEN
- DBMS_OUTPUT.put_line ('session#: ' || ext_attr.accessnumber ());
- END IF;
- ext_attr := rowlcr.get_extra_attribute ('thread#');
- IF (ext_attr IS NOT NULL)
- THEN
- DBMS_OUTPUT.put_line ('thread#: ' || ext_attr.accessnumber ());
- END IF;
- ext_attr := rowlcr.get_extra_attribute ('tx_name');
- IF (ext_attr IS NOT NULL)
- THEN
- DBMS_OUTPUT.put_line ( 'transaction name: '
- || ext_attr.accessvarchar2 ()
- );
- END IF;
- ext_attr := rowlcr.get_extra_attribute ('username');
- IF (ext_attr IS NOT NULL)
- THEN
- DBMS_OUTPUT.put_line ('username: ' || ext_attr.accessvarchar2 ());
- END IF;
- ELSE
- DBMS_OUTPUT.put_line ('Non-LCR Message with type ' || typenm);
- END IF;
- END print_lcr;
4.print_transaction
- CREATE OR REPLACE PROCEDURE print_transaction (ltxnid IN VARCHAR2)
- IS
- i NUMBER;
- txnid VARCHAR2 (255);
- SOURCE VARCHAR2 (128);
- msgno NUMBER;
- msgcnt NUMBER;
- errno NUMBER;
- errmsg VARCHAR2 (255);
- lcr ANYDATA;
- BEGIN
- SELECT local_transaction_id, source_database, message_number,
- message_count, error_number, error_message
- INTO txnid, SOURCE, msgno,
- msgcnt, errno, errmsg
- FROM dba_apply_error
- WHERE local_transaction_id = ltxnid;
- DBMS_OUTPUT.put_line ('----- Local Transaction ID: ' || txnid);
- DBMS_OUTPUT.put_line ('----- Source Database: ' || SOURCE);
- DBMS_OUTPUT.put_line ('----Error in Message: ' || msgno);
- DBMS_OUTPUT.put_line ('----Error Number: ' || errno);
- DBMS_OUTPUT.put_line ('----Message Text: ' || errmsg);
- FOR i IN 1 .. msgcnt
- LOOP
- DBMS_OUTPUT.put_line ('--message: ' || i);
- lcr := DBMS_APPLY_ADM.get_error_message (i, txnid); -- gets the LCR
- print_lcr (lcr);
- END LOOP;
- END print_transaction;
这4个过程的具体用法就不记录了,简单易懂的。
通过这些输出后发现一个translation里面往往有很多条修改(不只是一张表),我这里最多的一个里面居然有11W条修改,当时真是崩溃。。
错误越来越多的原因就是一个translation里面那么多修改,而只要有一个出现ora-01403那整个translation都无法执行下去,这样就会造成连锁反应,后面的数据不停的报错,最后整个stream不可用只能重新导入。
发现这个问题的产生是由于很多update导致的,比如将table1中的id=2,name=2的记录修改为name=3,stream会去查找id=2 and name=2的记录,如果找不到就报ORA-01403,这样几乎不可能修复数据(一个表上百个字段不可能一个一个去对比),最后网上搜索了一下,发现oracle的stream提供了一个update的冲突解决过程DBMS_APPLY_ADM.SET_UPDATE_CONFLICT_HANDLER,对于这个过程中resolution_column的意义我到目前还不是很理解,这里参考了别人的blog,等研究清楚在将记录补齐。设置完这个后update的冲突解决了,附上blog的内容和我用来批量生成脚本的sql
sql如下:
- select 'DECLARE
- cols DBMS_UTILITY.NAME_ARRAY;
- BEGIN
- cols(1) := ''' || column_name || ''';
- DBMS_APPLY_ADM.SET_UPDATE_CONFLICT_HANDLER(object_name => ''schema_name.' ||
- table_name ||
- ''', method_name => ''OVERWRITE'',resolution_column => ''' ||
- column_name || ''',column_list => cols);
- END;
- /'
- from user_tab_columns
- where table_name in ();
blog的内容如下:
- SQL> declare
- 2 cols dbms_utility.name_array;
- 3 begin
- 4 cols(1):='ID';
- 5 cols(2):='VNAME';
- 6 cols(3):='QTY';
- 7 cols(4):='VADDR';
- 8 cols(5):='VSEX';
- 9 dbms_apply_adm.set_update_conflict_handler(
- 10 object_name => 'HZ.T1',
- 11 method_name => 'overwrite',
- 12 resolution_column => 'VNAME',
- 13 column_list => cols);
- 14 end;
- 15 /
- PL/SQL procedure successfully completed
SQL> SELECT * FROM HZ.T1;
ID VNAME QTY VADDR VSEX
---------- -------------------- ---------- ---------- ----------
1 guangzhou 100 111 333
2 111 200 111 333
ID VNAME QTY VADDR VSEX
---------- -------------------- ---------- ---------- ----------
1 guangzhou 100 111 333
2 333 200 111 333
STATUS
--------
ABORTED
APPLY_NAME LOCAL_TRANSACTION_ID ERROR_MESSAGE
------------ ---------------------- -------------------------------------------------------------------------------- -
APPLY_STANDY 5.13.848 ORA-26786: A row with key ("ID") = (2) exists but has conflicting column(s) "VNA
SQL> EXEC DBMS_APPLY_ADM.execute_error('5.13.848');
begin DBMS_APPLY_ADM.execute_error('5.13.848'); end;
ORA-23460: 列 "QTY" 的值丢失, 在解析方法 "OVERWRITE" 中用于 "HZ"."T1"."REP_UPDATE"
ORA-26786: 键为 ("ID") = (2) 的行存在, 但具有冲突列 "VNAME" (在表 HZ.T1 中)
ORA-01403: 未找到任何数据
ORA-06512: 在 "SYS.DBMS_APPLY_ERROR", line 151
ORA-06512: 在 "SYS.DBMS_APPLY_ERROR", line 265
ORA-06512: 在 "SYS.DBMS_APPLY_ADM", line 467
ORA-06512: 在 line 2
4.删除现有的冲突定义
- SQL> declare
- 2 cols dbms_utility.name_array;
- 3 begin
- 4 cols(1):='ID';
- 5 cols(2):='VNAME';
- 6 cols(3):='QTY';
- 7 cols(4):='VADDR';
- 8 cols(5):='VSEX';
- 9 dbms_apply_adm.set_update_conflict_handler(
- 10 object_name => 'HZ.T1',
- 11 method_name => null,
- 12 resolution_column => 'VNAME',
- 13 column_list => cols);
- 14 end;
- 15 /
- PL/SQL procedure successfully completed
5.重新定义新的冲突方式,字段值列出vname字段看效果
- SQL>
- SQL> declare
- 2 cols dbms_utility.name_array;
- 3 begin
- 4 cols(1):='VNAME';
- 5 dbms_apply_adm.set_update_conflict_handler(
- 6 object_name => 'HZ.T1',
- 7 method_name => 'overwrite',
- 8 resolution_column => 'VNAME',
- 9 column_list => cols);
- 10 end;
- 11 /
- PL/SQL procedure successfully completed
6.再次执行错误的事务号,执行成功了。
SQL> EXEC DBMS_APPLY_ADM.execute_error('5.13.848');
PL/SQL procedure successfully completed
7.查看目标库的记录,已经更新了
SQL> select * from hz.t1;
ID VNAME QTY VADDR VSEX
---------- -------------------- ---------- ---------- ----------
1 guangzhou 100 111 333
2 333 200 111 333
1.目标库更新:
SQL> update hz.t1 set vaddr='guangzhou',qty=1 where id=1;
1 row updated
SQL> commit;
Commit complete
SQL> SELECT * FROM hz.T1;
ID VNAME QTY VADDR VSEX
---------- -------------------- ---------- ---------- ----------
1 666 1 guangzhou 333
2 333 200 111 333
SQL> update hz.t1 set vaddr='aaa' where id=1;
1 row updated
SQL> commit;
Commit complete
3.应用出错.
SQL> select status from dba_apply;
STATUS
--------
ABORTED
------------ --------------------------------------------------------------------------------
26786 ORA-26786: A row with key ("ID") = (1) exists but has conflicting column(s) "VAD
SQL> exec dbms_apply_adm.execute_error('2.12.797');
begin dbms_apply_adm.execute_error('2.12.797'); end;
ORA-26786: 键为 ("ID") = (1) 的行存在, 但具有冲突列 "VADDR" (在表 HZ.T1 中)
ORA-01403: 未找到任何数据
ORA-06512: 在 "SYS.DBMS_APPLY_ERROR", line 151
ORA-06512: 在 "SYS.DBMS_APPLY_ERROR", line 265
ORA-06512: 在 "SYS.DBMS_APPLY_ADM", line 467
ORA-06512: 在 line 2
4.只好把所有更新的字段一个个写入冲突方式中,
- SQL> declare
- 2 cols dbms_utility.name_array;
- 3 begin
- 4 cols(1):='QTY'; --定义QTY的冲突方式
- 5 dbms_apply_adm.set_update_conflict_handler(
- 6 object_name => 'HZ.T1',
- 7 method_name => 'overwrite',
- 8 resolution_column => 'QTY',
- 9 column_list => cols);
- 10 end;
- 11 /
- PL/SQL procedure successfully completed
- SQL> declare
- 2 cols dbms_utility.name_array;
- 3 begin
- 4 cols(1):='VADDR'; --定义VADDR的冲突方式
- 5 dbms_apply_adm.set_update_conflict_handler(
- 6 object_name => 'HZ.T1',
- 7 method_name => 'overwrite',
- 8 resolution_column => 'VADDR',
- 9 column_list => cols);
- 10 end;
- 11 /
- PL/SQL procedure successfully completed
SQL> exec dbms_apply_adm.execute_error('2.12.797');
PL/SQL procedure successfully completed
SQL> SELECT * FROM hz.T1;
ID VNAME QTY VADDR VSEX
---------- -------------------- ---------- ---------- ----------
1 666 1 aaa 333
2 333 200 111 333
7. dba_apply_conflict_columns 表的记录。
SQL> select OBJECT_OWNER,OBJECT_NAME,METHOD_NAME,RESOLUTION_COLUMN,COLUMN_NAME from dba_apply_conflict_columns WHERE OBJECT_NAME='T1';
OBJECT_OWNER OBJECT_NAME METHOD_NAME RESOLUTION_COLUMN COLUMN_NAME
------------- ------------- ------------- ----------------------- ------------------------------
HZ T1 OVERWRITE QTY QTY
HZ T1 OVERWRITE VADDR VADDR
HZ T1 OVERWRITE VNAME VNAME