create or replace package xml_type_util is
/*
用于刷新rss_dataguru表中的数据(更新rss源)
exec xml_type_util.refresh_rss_table ;
*/
procedure refresh_rss_table ;
end ;
/
create or replace package body xml_type_util is
--rss表rss_dataguru
type t_rss_tb_list is table of rss_itpub%rowtype ;
--private function
function convert_to_DOMDocument (xmlclob clob) return xmldom.DOMDocument
is
parser xmlparser.PARSER;
result_document xmldom.DOMDocument;
begin
-- dbms_output.put_line(dbms_lob.Getlength(xmlclob)) ;
parser := xmlparser.newParser;
xmlparser.parseClob(parser, xmlclob);
result_document := xmlparser.getDocument(parser);
--set free
xmlparser.freeParser(parser);
return result_document ;
end ;
--private function 2
function parse_xml_to_record (doc xmldom.DOMDocument) return t_rss_tb_list is
lenUnit integer;
lenItem integer;
unitNodes xmldom.DOMNodeList;
itemNodes xmldom.DOMNodeList;
tempNode_unit xmldom.DOMNode;
tempNode xmldom.DOMNode;
name varchar2(1000);
value varchar2(1000);
rss_tb_list t_rss_tb_list := t_rss_tb_list() ;
begin
unitNodes := xmldom.getElementsByTagName(doc, 'item');
lenUnit := xmldom.getLength(unitNodes);
rss_tb_list.extend(lenUnit) ;
dbms_output.put_line(lenUnit) ;
FOR i in 0 .. lenUnit - 1 LOOP
tempNode_unit := xmldom.item( unitNodes, i );
itemNodes:=xmldom.getChildNodes(tempNode_unit);
lenItem := xmldom.getLength( itemNodes );
rss_tb_list(i+1).id := seq_rss_itpub.nextval ;
--遍历子元素,暂时没有找到更好的方法,使用case when 还可以获取其他异常情况,放到other字段中
FOR j in 0..lenItem-1 LOOP
tempNode := xmldom.item( itemNodes, j );
name := xmldom.getNodeName(tempNode);
value := xmldom.getNodeValue(xmldom.getFirstChild(tempNode));
case name
when 'title' then
rss_tb_list(i+1).title := value ;
when 'link' then
rss_tb_list(i+1).link := value ;
when 'description' then
rss_tb_list(i+1).description := value ;
when 'category' then
rss_tb_list(i+1).category := value ;
when 'author' then
rss_tb_list(i+1).author := value ;
when 'pubDate' then
--value : Mon, 24 Dec 2012 10:58:00 +0000 regexp_replace 截取字符串 因为默认显示的是0时区时间,北京为东八区,需要加8个小时
rss_tb_list(i+1).pubDate := TO_DATE(regexp_replace(value,'.*, |\+.+',''),'dd MON YYYY hh24:mi:ss')+8/24 ;
--附件信息
when 'enclosure' then
value := xmldom.getAttribute(xmldom.makeElement(tempNode),'url') ;
rss_tb_list(i+1).enclosure:=value ;
--其他信息,未处理的元素信息
else
rss_tb_list(i+1).other := rss_tb_list(i+1).other || ','||name||'@'||value ;
end case ;
--DBMS_output.PUT_LINE(i||j||name||value);
end loop;
end loop;
--free
xmldom.freeDocument(doc);
return rss_tb_list ;
end ;
--主函数
procedure refresh_rss_table is
--Oracle 开发 rss 地址
v_blog_url varchar2(4000) := 'http://www.itpub.net/forum.php?mod=rss'
||chr(38)||'fid=3'||chr(38)||
'auth=004clZR9P033xjFZsM%2FJe2jTbI5m3ObYbosefQZAy11tziIeN1967GnsmVY1c34d' ;
clobs clob ;
doc xmldom.DOMDocument ;
rss_s t_rss_tb_list ;
begin
-- DBMS_LOB.CREATETEMPORARY(clobs, true);
clobs := httpuritype(v_blog_url).getClob; --获取目标url的xml内容
If Dbms_Lob.Getlength(clobs) Is Null or Dbms_Lob.Getlength(clobs) = 0 Then
--判断url是否存在
raise_application_error(-20999, 'No xml.');
End If;
doc := convert_to_DOMDocument(clobs) ;
rss_s := parse_xml_to_record(doc) ;
--如果存在相同的title,不做操作,如果表中不存在,则执行插入操作
forall i in 1 .. rss_s.last
execute immediate '
merge into rss_itpub r
using (select :1 as title ,
:2 as link ,
:3 as description ,
:4 as category ,
:5 as author ,
:6 as pubDate,
:7 as enclosure ,
:8 as other ,
:9 as id from dual )l
on (r.title=l.title)
when not matched then
insert (title,link,description,category,author,pubDate,enclosure,other,id)
values (l.title,l.link,l.description,l.category,l.author,l.pubDate,l.enclosure,l.other,l.id)'
using rss_s(i).title,rss_s(i).link,rss_s(i).description,rss_s(i).category,rss_s(i).author,
rss_s(i).pubDate,rss_s(i).enclosure,rss_s(i).other,rss_s(i).id ;
commit ;
end ;
end;
/