数据库有如下表结构:
aaa (
obj_id NUMBER, (Primary Key)
obj_name VARCHAR2(80),
obj_size NUMBER NOT NULL
obj_time NUMBER NOT NULL
ms_version NUMBER NOT NULL
)
现在有100万条记录,要求每1000条记录导出为一个csv文件,共计1000个文件,文件名称格式为output0.csv,output1.csv......output999.csv。每个csv文件每行记录包含:$1, $2, $3, $4四部分内容。
$1 = obj_id|obj_name
$2 = obj_size *10+obj_version
$3 = obj_time in format YYYY-MM-DD HH:mi
$4 = obj_name
要求第一个文件在500ms以内产生出来,后面每个文件生成间隔时间不得大于500ms,速度均匀。方案一:使用pl/sql存储过程分页查询,每次查询1000条记录写一个文件,实现代码如下:
- create or replace procedure export_to_csv(P_DIR IN VARCHAR2)
- is
- CSV_OUTPUT UTL_FILE.FILE_TYPE;
- MAX_LINE NUMBER := 1000;
- OUT_FILE_NAME varchar2(20);
- OBJ_SIZE NUMBER;
- MIN_RECORD NUMBER;
- MAX_RECORD NUMBER;
- OBJ_DATE varchar2(100);
- BEGIN_TIME NUMBER;
- END_TIME NUMBER;
- begin
- BEGIN_TIME := dbms_utility.get_time;--记录开始时间,注意dbms_utility.get_time获取的时间戳单位是1/100s
- FOR I IN 0..999 loop
- OUT_FILE_NAME := 'output' || I ||'.csv';--拼接文件名
- MIN_RECORD := i*1000 + 1;--分页查询起始记录
- MAX_RECORD := (i+1)*1000;--分页查询中止记录
- --以写方式打开指定目录中指定文件名文件
- CSV_OUTPUT := UTL_FILE.FOPEN(P_DIR, OUT_FILE_NAME, 'W', MAX_LINE);
- --隐式游标,每次查询1000条记录
- FOR cur IN (SELECT * FROM(SELECT A.*, ROWNUM rn FROM(SELECT * FROM aaa)A WHERE ROWNUM <= MAX_RECORD) WHERE rn >= MIN_RECORD) LOOP
- OBJ_SIZE := cur.OBJ_SIZE*10 + cur.MS_VERSION;
- --将数值类型数据转换为日期字符串
- OBJ_DATE := TO_CHAR(TO_DATE('19700101','yyyymmdd') + cur.OBJ_TIME/86400,'yyyy-MM-dd HH24:mi');
- --写文件
- UTL_FILE.PUT_LINE(CSV_OUTPUT,cur.OBJ_ID || '|' || cur.OBJ_NAME || ',' || OBJ_SIZE || ',' || OBJ_DATE || ',' || cur.OBJ_NAME);
- END LOOP;
- UTL_FILE.FCLOSE(CSV_OUTPUT);
- END LOOP;
- END_TIME := dbms_utility.get_time;
- DBMS_output.put_line('Total time=' || (END_TIME-BEGIN_TIME)*10 || 'ms.');
- END;
- /
- begin
- export_to_csv('MYDIR');
- end;
- /
create or replace procedure export_to_csv(P_DIR IN VARCHAR2)
is
CSV_OUTPUT UTL_FILE.FILE_TYPE;
MAX_LINE NUMBER := 1000;
OUT_FILE_NAME varchar2(20);
OBJ_SIZE NUMBER;
MIN_RECORD NUMBER;
MAX_RECORD NUMBER;
OBJ_DATE varchar2(100);
BEGIN_TIME NUMBER;
END_TIME NUMBER;
begin
BEGIN_TIME := dbms_utility.get_time;--记录开始时间,注意dbms_utility.get_time获取的时间戳单位是1/100s
FOR I IN 0..999 loop
OUT_FILE_NAME := 'output' || I ||'.csv';--拼接文件名
MIN_RECORD := i*1000 + 1;--分页查询起始记录
MAX_RECORD := (i+1)*1000;--分页查询中止记录
--以写方式打开指定目录中指定文件名文件
CSV_OUTPUT := UTL_FILE.FOPEN(P_DIR, OUT_FILE_NAME, 'W', MAX_LINE);
--隐式游标,每次查询1000条记录
FOR cur IN (SELECT * FROM(SELECT A.*, ROWNUM rn FROM(SELECT * FROM aaa)A WHERE ROWNUM <= MAX_RECORD) WHERE rn >= MIN_RECORD) LOOP
OBJ_SIZE := cur.OBJ_SIZE*10 + cur.MS_VERSION;
--将数值类型数据转换为日期字符串
OBJ_DATE := TO_CHAR(TO_DATE('19700101','yyyymmdd') + cur.OBJ_TIME/86400,'yyyy-MM-dd HH24:mi');
--写文件
UTL_FILE.PUT_LINE(CSV_OUTPUT,cur.OBJ_ID || '|' || cur.OBJ_NAME || ',' || OBJ_SIZE || ',' || OBJ_DATE || ',' || cur.OBJ_NAME);
END LOOP;
UTL_FILE.FCLOSE(CSV_OUTPUT);
END LOOP;
END_TIME := dbms_utility.get_time;
DBMS_output.put_line('Total time=' || (END_TIME-BEGIN_TIME)*10 || 'ms.');
END;
/
begin
export_to_csv('MYDIR');
end;
/
经过测试,1000个文件总共花150s,平均每个文件150ms。
注意:执行该存储过程之前,UTL_FILE的目录必须以sysdba的用户创建,然后授权给使用的用户,代码中的MYDIR通过下面方法创建:
首先,以sysdba用户登录。
其次,创建目录,如:CREATE DIRECTORY MYDIR AS 'c:\oraload\';
最后,给用户授权,如:GRANT READ,WRITE ON DIRECTORY MYDIR TO scott;
方案二:部分页,一次性将100万条记录全部查询出来放到游标中,每1000条写一个文件,代码如下:
- create or replace procedure export_to_csv(P_DIR IN VARCHAR2)
- is
- --显示游标,一次性将数据全部查询完
- cursor mycur is select * from aaa;
- --行记录
- myrecord aaa%rowtype;
- CSV_OUTPUT UTL_FILE.FILE_TYPE;
- MAX_LINE NUMBER := 1000;
- OUT_FILE_NAME varchar2(20);
- OBJ_SIZE NUMBER;
- OBJ_DATE varchar2(100);
- BEGIN_TIME NUMBER;
- END_TIME NUMBER;
- COUNT_NUM NUMBER;
- begin
- BEGIN_TIME := dbms_utility.get_time;
- --显式打开游标
- open mycur;
- FOR I IN 0..999 loop
- --拼接文件名
- OUT_FILE_NAME := 'output' || I ||'.csv';
- COUNT_NUM := 0;
- --打开文件
- CSV_OUTPUT := UTL_FILE.FOPEN(P_DIR, OUT_FILE_NAME, 'W', MAX_LINE);
- --每1000条写一个文件
- while COUNT_NUM < 1000 loop
- --逐条叫游标记录放入记录中
- fetch mycur into myrecord;
- OBJ_SIZE := myrecord.OBJ_SIZE*10 + myrecord.MS_VERSION;
- OBJ_DATE := TO_CHAR(TO_DATE('19700101','yyyymmdd') + myrecord.OBJ_TIME/86400,'yyyy-MM-dd HH24:mi');
- UTL_FILE.PUT_LINE(CSV_OUTPUT,myrecord.OBJ_ID || '|' || myrecord.OBJ_NAME || ',' || OBJ_SIZE || ',' || OBJ_DATE || ',' || myrecord.OBJ_NAME);
- COUNT_NUM := COUNT_NUM+1;
- --取游标中下一条记录
- fetch mycur into myrecord;
- END LOOP;
- UTL_FILE.FCLOSE(CSV_OUTPUT);
- END LOOP;
- --关闭游标
- close mycur;
- END_TIME := dbms_utility.get_time;
- DBMS_output.put_line('Total time=' || (END_TIME-BEGIN_TIME)*10 || 'ms.');
- END;
- /
- begin
- export_to_csv('MYDIR');
- end;
- /
create or replace procedure export_to_csv(P_DIR IN VARCHAR2)
is
--显示游标,一次性将数据全部查询完
cursor mycur is select * from aaa;
--行记录
myrecord aaa%rowtype;
CSV_OUTPUT UTL_FILE.FILE_TYPE;
MAX_LINE NUMBER := 1000;
OUT_FILE_NAME varchar2(20);
OBJ_SIZE NUMBER;
OBJ_DATE varchar2(100);
BEGIN_TIME NUMBER;
END_TIME NUMBER;
COUNT_NUM NUMBER;
begin
BEGIN_TIME := dbms_utility.get_time;
--显式打开游标
open mycur;
FOR I IN 0..999 loop
--拼接文件名
OUT_FILE_NAME := 'output' || I ||'.csv';
COUNT_NUM := 0;
--打开文件
CSV_OUTPUT := UTL_FILE.FOPEN(P_DIR, OUT_FILE_NAME, 'W', MAX_LINE);
--每1000条写一个文件
while COUNT_NUM < 1000 loop
--逐条叫游标记录放入记录中
fetch mycur into myrecord;
OBJ_SIZE := myrecord.OBJ_SIZE*10 + myrecord.MS_VERSION;
OBJ_DATE := TO_CHAR(TO_DATE('19700101','yyyymmdd') + myrecord.OBJ_TIME/86400,'yyyy-MM-dd HH24:mi');
UTL_FILE.PUT_LINE(CSV_OUTPUT,myrecord.OBJ_ID || '|' || myrecord.OBJ_NAME || ',' || OBJ_SIZE || ',' || OBJ_DATE || ',' || myrecord.OBJ_NAME);
COUNT_NUM := COUNT_NUM+1;
--取游标中下一条记录
fetch mycur into myrecord;
END LOOP;
UTL_FILE.FCLOSE(CSV_OUTPUT);
END LOOP;
--关闭游标
close mycur;
END_TIME := dbms_utility.get_time;
DBMS_output.put_line('Total time=' || (END_TIME-BEGIN_TIME)*10 || 'ms.');
END;
/
begin
export_to_csv('MYDIR');
end;
/
经测试发现,100万条记录总共耗时50s,平均每个文件50ms,速度大大提高。
创建utl_file_dir步骤如下:
1. 以sys用户登录数据库
2. create or replace directory MY_DIR as 'c:/abc';
3. grant read,write on directory TMP to public(数据库用户);
不需要重启数据库
来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/26690043/viewspace-735901/,如需转载,请注明出处,否则将追究法律责任。
转载于:http://blog.itpub.net/26690043/viewspace-735901/