DBMS_PARALLEL_EXECUTE 11GR2新特性，并行订正大数据

最新推荐文章于 2022-07-20 22:07:51 发布

cotchte0421

最新推荐文章于 2022-07-20 22:07:51 发布

阅读量179

点赞数

http://blog.itpub.net/post/7167/495835

https://support.oracle.com/CSP/main/article?cmd=show&type=NOT&doctype=BULLETIN&id=1066555.1

http://www.cnblogs.com/tracy/archive/2011/06/09/2076324.html

Applies to:

PL/SQL - Version: 11.2.0.1.0 to 11.2.0.1.0
Information in this document applies to any platform.

Purpose

The purpose of the article is to introduce the 11.2 new feature on using DBMS_PARALLEL_EXECUTE to update large tables using chunk mechanism.

Scope and Application

This is intended for PL/SQL programmers

11.2 New Feature : Using DBMS_PARALLEL_EXECUTE to Update Large Tables in Parallel

Overview

The DBMS_PARALLEL_EXECUTE package enables you to incrementally update the data in a large table in parallel, in two high-level steps:

Group sets of rows in the table into smaller chunks.
Apply the desired UPDATE statement to the chunks in parallel, committing each time you have finished processing a chunk.

This technique is recommended whenever you are updating a lot of data. Its advantages are:

You lock only one set of rows at a time, for a relatively short time, instead of locking the entire table.
You do not lose work that has been done if something fails before the entire operation finishes.
You reduce rollback space consumption.
You improve performance.

Different Ways to Spilt Workload

CREATE_CHUNKS_BY_NUMBER_COL : Chunks the table associated with the given task by the specified column.
CREATE_CHUNKS_BY_ROWID : Chunks the table associated with the given task by ROWID
CREATE_CHUNKS_BY_SQL : Chunks the table associated with the given task by means of a user-provided SELECT statement

Pre-Requisites before using DBMS_PARALLEL_EXECUTE.

The user controlling the process needs the CREATE JOB privilege
Execute privilege on DBMS_SQL package because CHUNK_BY_SQL, RUN_TASK, and RESUME_TASK subprograms require a query, and are executed using DBMS_SQL.

Sample Program

Creating user , granting privileges and generating sample data.

Conn /as sysdba
/* Passwords are case-sentive from 11.1 */
GRANT Create Session, Resource to Usr identified by Usr;
GRANT CREATE JOB TO Usr;

Conn Usr/Usr
/*Create table with sample data*/
DROP TABLE test_tab;
CREATE TABLE test_tab (
id NUMBER,
description VARCHAR2(50),
num_col NUMBER,
CONSTRAINT test_tab_pk PRIMARY KEY (id)
);

INSERT /*+ APPEND */ INTO test_tab
SELECT level,
'Description for ' || level,
CASE
WHEN MOD(level, 5) = 0 THEN 10
WHEN MOD(level, 3) = 0 THEN 20
ELSE 30
END
FROM dual
CONNECT BY level <= 500000;
COMMIT;

SELECT num_col, COUNT(*)
FROM test_tab
GROUP BY num_col
ORDER BY num_col;

NUM_COL COUNT(*)
---------- ----------
10 100000
20 133333
30 266667

1. Using CREATE_CHUNKS_BY_ROWID

BEGIN
DBMS_PARALLEL_EXECUTE.DROP_TASK ('test_task');
END;
/

DECLARE
l_task VARCHAR2(30) := 'test_task';
l_sql_stmt VARCHAR2(32767);
l_try NUMBER;
l_status NUMBER;
BEGIN
-- Create the TASK
DBMS_PARALLEL_EXECUTE.create_task (task_name => l_task);

-- Chunk the table by the ROWID
DBMS_PARALLEL_EXECUTE.create_chunks_by_rowid(task_name => l_task,
table_owner => 'USR',
table_name => 'TEST_TAB',
by_row => TRUE,
chunk_size => 10000);
-- DML to be execute in parallel
l_sql_stmt := 'UPDATE /*+ ROWID (dda) */ test_tab t
SET t.num_col = t.num_col + 10
WHERE rowid BETWEEN :start_id AND :end_id';
-- Run the task
DBMS_PARALLEL_EXECUTE.run_task(task_name => l_task,
sql_stmt => l_sql_stmt,
language_flag => DBMS_SQL.NATIVE,
parallel_level => 10);

-- If there is error, RESUME it for at most 2 times.
l_try := 0;
l_status := DBMS_PARALLEL_EXECUTE.task_status(l_task);
WHILE(l_try < 2 and l_status != DBMS_PARALLEL_EXECUTE.FINISHED)
Loop
l_try := l_try + 1;
DBMS_PARALLEL_EXECUTE.resume_task(l_task);
l_status := DBMS_PARALLEL_EXECUTE.task_status(l_task);
END LOOP;
-- Done with processing; drop the task
DBMS_PARALLEL_EXECUTE.drop_task(l_task);
END;
/

SQL> SELECT num_col, COUNT(*)
FROM test_tab
GROUP BY num_col
ORDER BY num_col;

NUM_COL COUNT(*)
---------- ----------
20 100000
30 133333
40 266667

2. Using CREATE_CHUNKS_BY_NUMBER_COL

CREATE OR REPLACE PROCEDURE process_update (p_start_id IN NUMBER, p_end_id IN NUMBER) AS
BEGIN
UPDATE /*+ ROWID (dda) */ test_tab t
SET t.num_col = t.num_col + 10
WHERE id BETWEEN p_start_id AND p_end_id;
END;
/

DECLARE
l_task VARCHAR2(30) := 'test_task';
l_sql_stmt VARCHAR2(32767);
l_try NUMBER;
l_status NUMBER;
BEGIN
-- Create the TASK
DBMS_PARALLEL_EXECUTE.create_task (task_name => l_task);

-- Chunk the table by the ID
DBMS_PARALLEL_EXECUTE.create_chunks_by_number_col(task_name => l_task,
table_owner => 'USR',
table_name => 'TEST_TAB',
table_column => 'ID',
chunk_size => 10000);
-- Procedure to be execute in parallel
l_sql_stmt := 'BEGIN process_update(:start_id, :end_id); END;';

DBMS_PARALLEL_EXECUTE.run_task(task_name => l_task,
sql_stmt => l_sql_stmt,
language_flag => DBMS_SQL.NATIVE,
parallel_level => 10);

-- If there is error, RESUME it for at most 2 times.
l_try := 0;
l_status := DBMS_PARALLEL_EXECUTE.task_status(l_task);
WHILE(l_try < 2 and l_status != DBMS_PARALLEL_EXECUTE.FINISHED)
Loop
l_try := l_try + 1;
DBMS_PARALLEL_EXECUTE.resume_task(l_task);
l_status := DBMS_PARALLEL_EXECUTE.task_status(l_task);
END LOOP;
-- Done with processing; drop the task
DBMS_PARALLEL_EXECUTE.drop_task(l_task);
END;
/

SQL> SELECT num_col, COUNT(*)
FROM test_tab
GROUP BY num_col
ORDER BY num_col;

NUM_COL COUNT(*)
---------- ----------
20 100000
30 133333
40 266667

3. Using CREATE_CHUNKS_BY_SQL.

DECLARE
l_chunk_sql VARCHAR2(1000);
l_sql_stmt VARCHAR2(1000);
l_try NUMBER;
l_status NUMBER;
BEGIN

-- Create the TASK
DBMS_PARALLEL_EXECUTE.CREATE_TASK ('test_task');

-- Chunk the table by NUM_COL
l_chunk_sql := 'SELECT DISTINCT num_col, num_col FROM test_tab';
DBMS_PARALLEL_EXECUTE.CREATE_CHUNKS_BY_SQL('test_task', l_chunk_sql, false);

-- Execute the DML in parallel
-- the WHERE clause contain a condition on num_col, which is the chunk
-- column. In this case, grouping rows is by num_col.
l_sql_stmt := 'UPDATE /*+ ROWID (dda) */ test_tab t
SET t.num_col = t.num_col + 10
WHERE num_col BETWEEN :start_id AND :end_id';
DBMS_PARALLEL_EXECUTE.RUN_TASK('test_task', l_sql_stmt, DBMS_SQL.NATIVE,
parallel_level => 10);

-- If there is error, RESUME it for at most 2 times.
L_try := 0;
L_status := DBMS_PARALLEL_EXECUTE.TASK_STATUS('test_task');
WHILE(l_try < 2 and L_status != DBMS_PARALLEL_EXECUTE.FINISHED)
Loop
L_try := l_try + 1;
DBMS_PARALLEL_EXECUTE.RESUME_TASK('test_task');
L_status := DBMS_PARALLEL_EXECUTE.TASK_STATUS('test_task');
END LOOP;

-- Done with processing; drop the task
DBMS_PARALLEL_EXECUTE.DROP_TASK('test_task');

end;
/

SQL> SELECT num_col, COUNT(*)
FROM test_tab
GROUP BY num_col
ORDER BY num_col;

NUM_COL COUNT(*)
---------- ----------
20 100000
30 133333
40 266667

References :

Oracle Database PL/SQL Packages and Types Reference
11g Release 2 (11.2)
Part Number E10577-05

来自 “ ITPUB博客 ” ，链接：http://blog.itpub.net/22034023/viewspace-714596/，如需转载，请注明出处，否则将追究法律责任。

转载于:http://blog.itpub.net/22034023/viewspace-714596/