使用PL/SQL 解析Docx 文件

本文不是原创,只是原文路径未及时保存。

我们知道,Word2007 保存的文件格式docx 其实是一个压缩包,可以使用rar或者7-zip 打开。 那么如何使用PL/SQL 来读写docx文件呢?

解决问题的关键还是使用PL/SQL 对压缩文件解压后处理。

 

CREATE OR REPLACE PACKAGE as_zip
IS
TYPE file_list IS TABLE OF CLOB;


FUNCTION get_file_list (p_dir IN VARCHAR2, p_zip_file IN VARCHAR2)
RETURN file_list;


FUNCTION get_file_list (p_zipped_blob IN BLOB)
RETURN file_list;


FUNCTION get_file (
p_dir IN VARCHAR2,
p_zip_file IN VARCHAR2,
p_file_name IN VARCHAR2
)
RETURN BLOB;


FUNCTION get_file (p_zipped_blob IN BLOB, p_file_name IN VARCHAR2)
RETURN BLOB;

END;
/




/* Formatted on 2010/06/23 14:24 (Formatter Plus v4.8.8) */
CREATE OR REPLACE PACKAGE BODY as_zip
IS
--
FUNCTION raw2num (p_value IN RAW)
RETURN NUMBER
IS
BEGIN -- note: FFFFFFFF => -1
RETURN UTL_RAW.cast_to_binary_integer (p_value, UTL_RAW.little_endian);
END;

--
FUNCTION file2blob (p_dir IN VARCHAR2, p_file_name IN VARCHAR2)
RETURN BLOB
IS
file_lob BFILE;
file_blob BLOB;
BEGIN
file_lob := BFILENAME (p_dir, p_file_name);
DBMS_LOB.OPEN (file_lob, DBMS_LOB.file_readonly);
DBMS_LOB.createtemporary (file_blob, TRUE);
DBMS_LOB.loadfromfile (file_blob, file_lob, DBMS_LOB.lobmaxsize);
DBMS_LOB.CLOSE (file_lob);
RETURN file_blob;
EXCEPTION
WHEN OTHERS
THEN
IF DBMS_LOB.ISOPEN (file_lob) = 1
THEN
DBMS_LOB.CLOSE (file_lob);
END IF;

IF DBMS_LOB.istemporary (file_blob) = 1
THEN
DBMS_LOB.freetemporary (file_blob);
END IF;

RAISE;
END;

--
FUNCTION get_file_list (p_dir IN VARCHAR2, p_zip_file IN VARCHAR2)
RETURN file_list
IS
BEGIN
RETURN get_file_list (file2blob (p_dir, p_zip_file));
END;

--
FUNCTION get_file_list (p_zipped_blob IN BLOB)
RETURN file_list
IS
t_ind INTEGER;
t_hd_ind INTEGER;
t_rv file_list;
BEGIN
t_ind := DBMS_LOB.getlength (p_zipped_blob) - 21;

LOOP
EXIT WHEN DBMS_LOB.SUBSTR (p_zipped_blob, 4, t_ind) =
HEXTORAW ('504B0506')
OR t_ind < 1;
t_ind := t_ind - 1;
END LOOP;

--
IF t_ind <= 0
THEN
RETURN NULL;
END IF;

--
t_hd_ind := raw2num (DBMS_LOB.SUBSTR (p_zipped_blob, 4, t_ind + 16)) + 1;
t_rv := file_list ();
t_rv.EXTEND (raw2num (DBMS_LOB.SUBSTR (p_zipped_blob, 2, t_ind + 10)));

FOR i IN 1 .. raw2num (DBMS_LOB.SUBSTR (p_zipped_blob, 2, t_ind + 8))
LOOP
t_rv (i) :=
UTL_RAW.cast_to_varchar2
(DBMS_LOB.SUBSTR (p_zipped_blob,
raw2num (DBMS_LOB.SUBSTR (p_zipped_blob,
2,
t_hd_ind + 28
)
),
t_hd_ind + 46
)
);
t_hd_ind :=
t_hd_ind
+ 46
+ raw2num (DBMS_LOB.SUBSTR (p_zipped_blob, 2, t_hd_ind + 28))
+ raw2num (DBMS_LOB.SUBSTR (p_zipped_blob, 2, t_hd_ind + 30))
+ raw2num (DBMS_LOB.SUBSTR (p_zipped_blob, 2, t_hd_ind + 32));
END LOOP;

--
RETURN t_rv;
END;

--
FUNCTION get_file (
p_dir IN VARCHAR2,
p_zip_file IN VARCHAR2,
p_file_name IN VARCHAR2
)
RETURN BLOB
IS
BEGIN
RETURN get_file (file2blob (p_dir, p_zip_file), p_file_name);
END;

--
FUNCTION get_file (p_zipped_blob IN BLOB, p_file_name IN VARCHAR2)
RETURN BLOB
IS
t_tmp BLOB;
t_ind INTEGER;
t_hd_ind INTEGER;
t_fl_ind INTEGER;
BEGIN
t_ind := DBMS_LOB.getlength (p_zipped_blob) - 21;

LOOP
EXIT WHEN DBMS_LOB.SUBSTR (p_zipped_blob, 4, t_ind) =
HEXTORAW ('504B0506')
OR t_ind < 1;
t_ind := t_ind - 1;
END LOOP;

--
IF t_ind <= 0
THEN
RETURN NULL;
END IF;

--
t_hd_ind := raw2num (DBMS_LOB.SUBSTR (p_zipped_blob, 4, t_ind + 16)) + 1;

FOR i IN 1 .. raw2num (DBMS_LOB.SUBSTR (p_zipped_blob, 2, t_ind + 8))
LOOP
IF p_file_name =
UTL_RAW.cast_to_varchar2
(DBMS_LOB.SUBSTR (p_zipped_blob,
raw2num (DBMS_LOB.SUBSTR (p_zipped_blob,
2,
t_hd_ind + 28
)
),
t_hd_ind + 46
)
)
THEN
IF DBMS_LOB.SUBSTR (p_zipped_blob, 2, t_hd_ind + 10) =
HEXTORAW ('0800')
-- deflate
THEN
t_fl_ind :=
raw2num (DBMS_LOB.SUBSTR (p_zipped_blob, 4, t_hd_ind + 42));
t_tmp := HEXTORAW ('1F8B0800000000000003'); -- gzip header
DBMS_LOB.append
(t_tmp,
DBMS_LOB.SUBSTR (p_zipped_blob,
raw2num (DBMS_LOB.SUBSTR (p_zipped_blob,
4,
t_fl_ind + 19
)
),
t_fl_ind
+ 31
+ raw2num (DBMS_LOB.SUBSTR (p_zipped_blob,
2,
t_fl_ind + 27
)
)
+ raw2num (DBMS_LOB.SUBSTR (p_zipped_blob,
2,
t_fl_ind + 29
)
)
)
);
DBMS_LOB.append (t_tmp,
DBMS_LOB.SUBSTR (p_zipped_blob,
4,
t_fl_ind + 15
)
);
DBMS_LOB.append (t_tmp, HEXTORAW ('00000000'));
RETURN UTL_COMPRESS.lz_uncompress (t_tmp);
END IF;

--
IF DBMS_LOB.SUBSTR (p_zipped_blob, 2, t_hd_ind + 10) =
HEXTORAW ('0000')
-- The file is stored (no compression)
THEN
t_fl_ind :=
raw2num (DBMS_LOB.SUBSTR (p_zipped_blob, 4, t_hd_ind + 42));
RETURN DBMS_LOB.SUBSTR
(p_zipped_blob,
raw2num (DBMS_LOB.SUBSTR (p_zipped_blob,
4,
t_fl_ind + 19
)
),
t_fl_ind
+ 31
+ raw2num (DBMS_LOB.SUBSTR (p_zipped_blob,
2,
t_fl_ind + 27
)
)
+ raw2num (DBMS_LOB.SUBSTR (p_zipped_blob,
2,
t_fl_ind + 29
)
)
);
END IF;
END IF;

t_hd_ind :=
t_hd_ind
+ 46
+ raw2num (DBMS_LOB.SUBSTR (p_zipped_blob, 2, t_hd_ind + 28))
+ raw2num (DBMS_LOB.SUBSTR (p_zipped_blob, 2, t_hd_ind + 30))
+ raw2num (DBMS_LOB.SUBSTR (p_zipped_blob, 2, t_hd_ind + 32));
END LOOP;

--
RETURN NULL;
END;
END;
/

 

执行:

set serveroutput on

declare
fl as_zip.file_list;
begin
fl := as_zip.get_file_list( 'MY_DIR', 'xxx.zip' );
if fl.count() > 0
then
for i in fl.first .. fl.last
loop
dbms_output.put_line( fl( i ) );
end loop;
end if;
end;
/

SQL> set serveroutput on
SQL> declare
2 fl as_zip.file_list;
3 begin
4 fl := as_zip.get_file_list( 'MY_DIR', 'xxx.zip' );
5 if fl.count() > 0
6 then
7 for i in fl.first .. fl.last
8 loop
9 dbms_output.put_line( fl( i ) );
10 end loop;
11 end if;
12 end;
13 /
[成功公关的22条黄金法则].常桦.扫描版.pdf
[成功沟通的22条黄金法则].常桦.扫描版.pdf
[超越自卑].阿德勒.扫描版.pdf

PL/SQL 过程已成功完成。

 

SELECT EXTRACTVALUE (COLUMN_VALUE, '*/text()')
FROM TABLE
(XMLSEQUENCE
(XMLTYPE (as_zip.get_file ('MY_DIR',
'demo.docx',
'word/document.xml'
),
NLS_CHARSET_ID ('UTF8')
).EXTRACT
('w:document/w:body/w:p/w:r/w:t',
'xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"'
)
)
)
/

SQL> select extractvalue( column_value, '*/text()' )
2 from table( xmlsequence( xmltype( as_zip.get_file( 'MY_DIR', 'demo.docx', '
word/document.xml' )
3 , nls_charset_id( 'UTF8' )
4 )
5 .extract( 'w:document/w:body/w:p/w:r/w:t',
'xmlns:w="
http://schemas.openxmlformats.org/wordprocessingml/2006/main"' )
6 )
7 )
8 /

EXTRACTVALUE(COLUMN_VALUE,'*/TEXT()')
--------------------------------------------------------------------------------

附件2:

医院信息化基础建设评分标准与方法


Technorati : PL/SQL DOCX ZIP

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值