Applies to:
PL/SQL - Version: 11.2.0.1.0 to 11.2.0.1.0Information in this document applies to any platform.
Purpose
The purpose of the article is to introduce the 11.2 new feature on using DBMS_PARALLEL_EXECUTE to update large tables using chunk mechanism.Scope and Application
This is intended for PL/SQL programmers11.2 New Feature : Using DBMS_PARALLEL_EXECUTE to Update Large Tables in Parallel
Overview
The DBMS_PARALLEL_EXECUTE package enables you to incrementally update the data in a large table in parallel, in two high-level steps:- Group sets of rows in the table into smaller chunks.
- Apply the desired UPDATE statement to the chunks in parallel, committing each time you have finished processing a chunk.
- You lock only one set of rows at a time, for a relatively short time, instead of locking the entire table.
- You do not lose work that has been done if something fails before the entire operation finishes.
- You reduce rollback space consumption.
- You improve performance.
Different Ways to Spilt Workload
- CREATE_CHUNKS_BY_NUMBER_COL : Chunks the table associated with the given task by the specified column.
- CREATE_CHUNKS_BY_ROWID : Chunks the table associated with the given task by ROWID
- CREATE_CHUNKS_BY_SQL : Chunks the table associated with the given task by means of a user-provided SELECT statement
Pre-Requisites before using DBMS_PARALLEL_EXECUTE.
- The user controlling the process needs the CREATE JOB privilege
- Execute privilege on DBMS_SQL package because CHUNK_BY_SQL, RUN_TASK, and RESUME_TASK subprograms require a query, and are executed using DBMS_SQL.
Sample Program
Creating user , granting privileges and generating sample data.
Conn /as sysdba
/* Passwords are case-sentive from 11.1 */
GRANT Create Session, Resource to Usr identified by Usr;
GRANT CREATE JOB TO Usr;
Conn Usr/Usr
/*Create table with sample data*/
DROP TABLE test_tab;
CREATE TABLE test_tab (
id NUMBER,
description VARCHAR2(50),
num_col NUMBER,
CONSTRAINT test_tab_pk PRIMARY KEY (id)
);
INSERT /*+ APPEND */ INTO test_tab
SELECT level,
'Description for ' || level,
CASE
WHEN MOD(level, 5) = 0 THEN 10
WHEN MOD(level, 3) = 0 THEN 20
ELSE 30
END
FROM dual
CONNECT BY level <= 500000;
COMMIT;
SELECT num_col, COUNT(*)
FROM test_tab
GROUP BY num_col
ORDER BY num_col;
NUM_COL COUNT(*)
---------- ----------
10 100000
20 133333
30 266667
Conn /as sysdba
/* Passwords are case-sentive from 11.1 */
GRANT Create Session, Resource to Usr identified by Usr;
GRANT CREATE JOB TO Usr;
Conn Usr/Usr
/*Create table with sample data*/
DROP TABLE test_tab;
CREATE TABLE test_tab (
id NUMBER,
description VARCHAR2(50),
num_col NUMBER,
CONSTRAINT test_tab_pk PRIMARY KEY (id)
);
INSERT /*+ APPEND */ INTO test_tab
SELECT level,
'Description for ' || level,
CASE
WHEN MOD(level, 5) = 0 THEN 10
WHEN MOD(level, 3) = 0 THEN 20
ELSE 30
END
FROM dual
CONNECT BY level <= 500000;
COMMIT;
SELECT num_col, COUNT(*)
FROM test_tab
GROUP BY num_col
ORDER BY num_col;
NUM_COL COUNT(*)
---------- ----------
10 100000
20 133333
30 266667
1. Using CREATE_CHUNKS_BY_ROWID
BEGIN
DBMS_PARALLEL_EXECUTE.DROP_TASK ('test_task');
END;
/
DECLARE
l_task VARCHAR2(30) := 'test_task';
l_sql_stmt VARCHAR2(32767);
l_try NUMBER;
l_status NUMBER;
BEGIN
-- Create the TASK
DBMS_PARALLEL_EXECUTE.create_task (task_name => l_task);
-- Chunk the table by the ROWID
DBMS_PARALLEL_EXECUTE.create_chunks_by_rowid(task_name => l_task,
table_owner => 'USR',
table_name => 'TEST_TAB',
by_row => TRUE,
chunk_size => 10000);
-- DML to be execute in parallel
l_sql_stmt := 'UPDATE /*+ ROWID (dda) */ test_tab t
SET t.num_col = t.num_col + 10
WHERE rowid BETWEEN :start_id AND :end_id';
-- Run the task
DBMS_PARALLEL_EXECUTE.run_task(task_name => l_task,
sql_stmt => l_sql_stmt,
language_flag => DBMS_SQL.NATIVE,
parallel_level => 10);
-- If there is error, RESUME it for at most 2 times.
l_try := 0;
l_status := DBMS_PARALLEL_EXECUTE.task_status(l_task);
WHILE(l_try < 2 and l_status != DBMS_PARALLEL_EXECUTE.FINISHED)
Loop
l_try := l_try + 1;
DBMS_PARALLEL_EXECUTE.resume_task(l_task);
l_status := DBMS_PARALLEL_EXECUTE.task_status(l_task);
END LOOP;
-- Done with processing; drop the task
DBMS_PARALLEL_EXECUTE.drop_task(l_task);
END;
/
SQL> SELECT num_col, COUNT(*)
FROM test_tab
GROUP BY num_col
ORDER BY num_col;
NUM_COL COUNT(*)
---------- ----------
20 100000
30 133333
40 266667
DBMS_PARALLEL_EXECUTE.DROP_TASK ('test_task');
END;
/
DECLARE
l_task VARCHAR2(30) := 'test_task';
l_sql_stmt VARCHAR2(32767);
l_try NUMBER;
l_status NUMBER;
BEGIN
-- Create the TASK
DBMS_PARALLEL_EXECUTE.create_task (task_name => l_task);
-- Chunk the table by the ROWID
DBMS_PARALLEL_EXECUTE.create_chunks_by_rowid(task_name => l_task,
table_owner => 'USR',
table_name => 'TEST_TAB',
by_row => TRUE,
chunk_size => 10000);
-- DML to be execute in parallel
l_sql_stmt := 'UPDATE /*+ ROWID (dda) */ test_tab t
SET t.num_col = t.num_col + 10
WHERE rowid BETWEEN :start_id AND :end_id';
-- Run the task
DBMS_PARALLEL_EXECUTE.run_task(task_name => l_task,
sql_stmt => l_sql_stmt,
language_flag => DBMS_SQL.NATIVE,
parallel_level => 10);
-- If there is error, RESUME it for at most 2 times.
l_try := 0;
l_status := DBMS_PARALLEL_EXECUTE.task_status(l_task);
WHILE(l_try < 2 and l_status != DBMS_PARALLEL_EXECUTE.FINISHED)
Loop
l_try := l_try + 1;
DBMS_PARALLEL_EXECUTE.resume_task(l_task);
l_status := DBMS_PARALLEL_EXECUTE.task_status(l_task);
END LOOP;
-- Done with processing; drop the task
DBMS_PARALLEL_EXECUTE.drop_task(l_task);
END;
/
SQL> SELECT num_col, COUNT(*)
FROM test_tab
GROUP BY num_col
ORDER BY num_col;
NUM_COL COUNT(*)
---------- ----------
20 100000
30 133333
40 266667
2. Using CREATE_CHUNKS_BY_NUMBER_COL
CREATE OR REPLACE PROCEDURE process_update (p_start_id IN NUMBER, p_end_id IN NUMBER) AS
BEGIN
UPDATE /*+ ROWID (dda) */ test_tab t
SET t.num_col = t.num_col + 10
WHERE id BETWEEN p_start_id AND p_end_id;
END;
/
DECLARE
l_task VARCHAR2(30) := 'test_task';
l_sql_stmt VARCHAR2(32767);
l_try NUMBER;
l_status NUMBER;
BEGIN
-- Create the TASK
DBMS_PARALLEL_EXECUTE.create_task (task_name => l_task);
-- Chunk the table by the ID
DBMS_PARALLEL_EXECUTE.create_chunks_by_number_col(task_name => l_task,
table_owner => 'USR',
table_name => 'TEST_TAB',
table_column => 'ID',
chunk_size => 10000);
-- Procedure to be execute in parallel
l_sql_stmt := 'BEGIN process_update(:start_id, :end_id); END;';
DBMS_PARALLEL_EXECUTE.run_task(task_name => l_task,
sql_stmt => l_sql_stmt,
language_flag => DBMS_SQL.NATIVE,
parallel_level => 10);
-- If there is error, RESUME it for at most 2 times.
l_try := 0;
l_status := DBMS_PARALLEL_EXECUTE.task_status(l_task);
WHILE(l_try < 2 and l_status != DBMS_PARALLEL_EXECUTE.FINISHED)
Loop
l_try := l_try + 1;
DBMS_PARALLEL_EXECUTE.resume_task(l_task);
l_status := DBMS_PARALLEL_EXECUTE.task_status(l_task);
END LOOP;
-- Done with processing; drop the task
DBMS_PARALLEL_EXECUTE.drop_task(l_task);
END;
/
SQL> SELECT num_col, COUNT(*)
FROM test_tab
GROUP BY num_col
ORDER BY num_col;
NUM_COL COUNT(*)
---------- ----------
20 100000
30 133333
40 266667
BEGIN
UPDATE /*+ ROWID (dda) */ test_tab t
SET t.num_col = t.num_col + 10
WHERE id BETWEEN p_start_id AND p_end_id;
END;
/
DECLARE
l_task VARCHAR2(30) := 'test_task';
l_sql_stmt VARCHAR2(32767);
l_try NUMBER;
l_status NUMBER;
BEGIN
-- Create the TASK
DBMS_PARALLEL_EXECUTE.create_task (task_name => l_task);
-- Chunk the table by the ID
DBMS_PARALLEL_EXECUTE.create_chunks_by_number_col(task_name => l_task,
table_owner => 'USR',
table_name => 'TEST_TAB',
table_column => 'ID',
chunk_size => 10000);
-- Procedure to be execute in parallel
l_sql_stmt := 'BEGIN process_update(:start_id, :end_id); END;';
DBMS_PARALLEL_EXECUTE.run_task(task_name => l_task,
sql_stmt => l_sql_stmt,
language_flag => DBMS_SQL.NATIVE,
parallel_level => 10);
-- If there is error, RESUME it for at most 2 times.
l_try := 0;
l_status := DBMS_PARALLEL_EXECUTE.task_status(l_task);
WHILE(l_try < 2 and l_status != DBMS_PARALLEL_EXECUTE.FINISHED)
Loop
l_try := l_try + 1;
DBMS_PARALLEL_EXECUTE.resume_task(l_task);
l_status := DBMS_PARALLEL_EXECUTE.task_status(l_task);
END LOOP;
-- Done with processing; drop the task
DBMS_PARALLEL_EXECUTE.drop_task(l_task);
END;
/
SQL> SELECT num_col, COUNT(*)
FROM test_tab
GROUP BY num_col
ORDER BY num_col;
NUM_COL COUNT(*)
---------- ----------
20 100000
30 133333
40 266667
3. Using CREATE_CHUNKS_BY_SQL.
DECLARE
l_chunk_sql VARCHAR2(1000);
l_sql_stmt VARCHAR2(1000);
l_try NUMBER;
l_status NUMBER;
BEGIN
-- Create the TASK
DBMS_PARALLEL_EXECUTE.CREATE_TASK ('test_task');
-- Chunk the table by NUM_COL
l_chunk_sql := 'SELECT DISTINCT num_col, num_col FROM test_tab';
DBMS_PARALLEL_EXECUTE.CREATE_CHUNKS_BY_SQL('test_task', l_chunk_sql, false);
-- Execute the DML in parallel
-- the WHERE clause contain a condition on num_col, which is the chunk
-- column. In this case, grouping rows is by num_col.
l_sql_stmt := 'UPDATE /*+ ROWID (dda) */ test_tab t
SET t.num_col = t.num_col + 10
WHERE num_col BETWEEN :start_id AND :end_id';
DBMS_PARALLEL_EXECUTE.RUN_TASK('test_task', l_sql_stmt, DBMS_SQL.NATIVE,
parallel_level => 10);
-- If there is error, RESUME it for at most 2 times.
L_try := 0;
L_status := DBMS_PARALLEL_EXECUTE.TASK_STATUS('test_task');
WHILE(l_try < 2 and L_status != DBMS_PARALLEL_EXECUTE.FINISHED)
Loop
L_try := l_try + 1;
DBMS_PARALLEL_EXECUTE.RESUME_TASK('test_task');
L_status := DBMS_PARALLEL_EXECUTE.TASK_STATUS('test_task');
END LOOP;
-- Done with processing; drop the task
DBMS_PARALLEL_EXECUTE.DROP_TASK('test_task');
end;
/
SQL> SELECT num_col, COUNT(*)
FROM test_tab
GROUP BY num_col
ORDER BY num_col;
NUM_COL COUNT(*)
---------- ----------
20 100000
30 133333
40 266667
l_chunk_sql VARCHAR2(1000);
l_sql_stmt VARCHAR2(1000);
l_try NUMBER;
l_status NUMBER;
BEGIN
-- Create the TASK
DBMS_PARALLEL_EXECUTE.CREATE_TASK ('test_task');
-- Chunk the table by NUM_COL
l_chunk_sql := 'SELECT DISTINCT num_col, num_col FROM test_tab';
DBMS_PARALLEL_EXECUTE.CREATE_CHUNKS_BY_SQL('test_task', l_chunk_sql, false);
-- Execute the DML in parallel
-- the WHERE clause contain a condition on num_col, which is the chunk
-- column. In this case, grouping rows is by num_col.
l_sql_stmt := 'UPDATE /*+ ROWID (dda) */ test_tab t
SET t.num_col = t.num_col + 10
WHERE num_col BETWEEN :start_id AND :end_id';
DBMS_PARALLEL_EXECUTE.RUN_TASK('test_task', l_sql_stmt, DBMS_SQL.NATIVE,
parallel_level => 10);
-- If there is error, RESUME it for at most 2 times.
L_try := 0;
L_status := DBMS_PARALLEL_EXECUTE.TASK_STATUS('test_task');
WHILE(l_try < 2 and L_status != DBMS_PARALLEL_EXECUTE.FINISHED)
Loop
L_try := l_try + 1;
DBMS_PARALLEL_EXECUTE.RESUME_TASK('test_task');
L_status := DBMS_PARALLEL_EXECUTE.TASK_STATUS('test_task');
END LOOP;
-- Done with processing; drop the task
DBMS_PARALLEL_EXECUTE.DROP_TASK('test_task');
end;
/
SQL> SELECT num_col, COUNT(*)
FROM test_tab
GROUP BY num_col
ORDER BY num_col;
NUM_COL COUNT(*)
---------- ----------
20 100000
30 133333
40 266667
References :
Oracle Database PL/SQL Packages and Types Reference
11g Release 2 (11.2)
Part Number E10577-05
来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/22034023/viewspace-714596/,如需转载,请注明出处,否则将追究法律责任。
转载于:http://blog.itpub.net/22034023/viewspace-714596/