埋点分析,果断搞起,早在oracle的9i推出xmltype类型,对xml在数据库存储提供了有力的支持,但这里先不用xmltype使用clob字段,xmltype11g 还是默认就是clob方式,12c中的
note:
XMLType
stored as CLOB
is deprecated as of Oracle Database 12c Release 1 (12.1).XMLType
tables and columns are now stored as binary XML. clob已经被废弃成不是默认。
xml文件如下:
<?xml version="1.0" encoding="utf-8"?>
<client name="iphone" version="2.5.1.0" versiontype="0000">
<user account="18888888888">
<events count_date="2012-08-19">
<event network="WiFi" count="5" durations="20">time_reg</event>
<event network="WiFi" count="9">send_soundim</event>
</events>
<events count_date="2012-08-18">
<event network="WiFi" count="4">send_sys_expression</event>
<event network="WiFi" count="5">click_accessory</event>
</events>
</user>
<user account=" noaccount">
<events count_date="2012-08-19">
<event network="CMCC3G" count="2">click_reg_next</event>
</events>
</user>
</client>
自己写的解析代码第一种如下:
select /*+ NO_XML_QUERY_REWRITE */ t.name,
t.version,
t.versiontype,
t.account,
t.network,
t.count_date,
t.cnt,
t.duration,
t.action
from
(SELECT XMLQuery('//client' PASSING res RETURNING CONTENT) OBJECT_VALUE
FROM xdb.resource_view r where any_path like '%client.xml'
and under_path(RES, '/home/SCOTT/demonstrations/introduction') = 1)p,
XMLTable('for $i in /client
let $a:=$i/@name
let $b:=$i/@version
let $c:=$i/@versiontype
let $d:=$i/user
for $j in $i/user
let $e:=$j/@account
for $x in $i/user/events
let $f:= $x/@count_date
for $y in $i/user/events/event
let $g:= $y/@network
let $h:= $y/@count
let $k:= $y/@duration
let $l:=$y/text()
return <client name="{$a}" version="{$b}" versiontype="{$c}" account="{$e}" count_date="{$f}" network="{$g}" count="{$h}" duration="{$k}" action="{$l}" >
</client> ' PASSING p.OBJECT_VALUE
COLUMNS name VARCHAR2(20) PATH '@name',
version VARCHAR2(24) PATH '@version',
versiontype VARCHAR2(16) PATH '@versiontype',
account VARCHAR2(50) PATH '@account',
network VARCHAR2(20) PATH '@network',
count_date VARCHAR2(48) PATH '@count_date',
cnt number(10) PATH '@count',
duration number(10) PATH '@duration',
action VARCHAR2(50) PATH '@action'
) t;
第二种:
CREATE OR REPLACE PROCEDURE sp_xmlparser(datestr IN VARCHAR2)
AS
/* datestr format 'yyyymmdd'
*
* edited by H.S vsersion 1.0
*/
-------------------------------------------------
--以下变量用于获取XML节点的值,对应表中的字段值
--xmlClobData CLOB;
client VARCHAR2(20);
version VARCHAR2(24);
versionType VARCHAR2(16);
account VARCHAR2(50);
network VARCHAR2(20);
action VARCHAR2(50);
count_date VARCHAR2(20);
cnt INTEGER;
duration INTEGER;
------------------------------------------------
--步骤1:定义或确定要解析的XML规则
--步骤2:创建XML解析器实例XMLPARSER.parser,如下:
xmlPar XMLPARSER.parser := XMLPARSER.NEWPARSER;
--步骤3:定义DOM文档对象,如下:
doc xmldom.DOMDocument;
--步骤4:定义解析XML所需要的其他对象,如下:
lenUser INTEGER;
eventItem INTEGER;
networkcount INTEGER;
eventlen INTEGER;
tempNode_event xmldom.DOMNode;
eventItem xmldom.DOMNode;
clientnode xmldom.DOMNode;
usernode xmldom.DOMNode;
eventnetwork_node xmldom.DOMNode;
unitNodes xmldom.DOMNodeList;
eventNodes xmldom.DOMNodeList;
userNodes xmldom.DOMNodeList;
eventnetwork_Nodes xmldom.DOMNodeList;
tempArrMap xmldom.DOMNamedNodeMap;
tempArrMap2 xmldom.DOMNamedNodeMap;
tempArrMap3 xmldom.DOMNamedNodeMap;
tempArrMap4 xmldom.DOMNamedNodeMap;
CURSOR cur IS SELECT xmlstring FROM xml_test WHERE loadtime=datestr;
v_xmlstring xml_test.xmlstring%TYPE;
BEGIN
OPEN cur;
LOOP
FETCH cur INTO v_xmlstring;
EXIT WHEN cur%NOTFOUND;
--步骤5:获取xml数据,以下假设从数据表的clob字段中获取取,如下:
-- SELECT xmlstring INTO xmlClobData FROM xml_test;
--步骤6:解析xml数据,如下
xmlPar := xmlparser.newParser;
xmlparser.parseClob(xmlPar,v_xmlstring);
doc := xmlparser.getDocument(xmlPar);
-- 释放解析器实例
xmlparser.freeParser(xmlPar);
-- 获取所有client元素
unitNodes := xmldom.getElementsByTagName(doc,'client');
-- 遍历clinet 中的 name=”iphone” version="2.5.1.0" versiontype="0000"属性
clientNode := xmldom.item(unitNodes,0);
tempArrMap := xmldom.getAttributes(clientnode);
client := xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap,'name'));
version := xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap,'version'));
versionType := xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap,'versiontype'));
--DBMS_OUTPUT.PUT_LINE('client'||lenUnit);
--user account子节点个数
userNodes := xmldom.getChildNodes(clientNode);
lenUser := xmldom.getLength(userNodes);
--DBMS_output.Put_Line('lenuser'||lenUser);
--第i个user account子节点
FOR i in 0..lenUser-1
LOOP
--获取第i个user account子节点元素值
usernode:=xmldom.item(userNodes,i);
tempArrMap2:=xmldom.getAttributes(usernode);
account:= xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap2,'account'));
--event子节点个数
eventNodes:= xmldom.getChildNodes(usernode);
eventlen:=xmldom.getLength(eventNodes);
--DBMS_output.Put_Line('eventlen'||eventlen);
FOR j in 0..eventlen-1
LOOP
--获取j个event子节点
tempNode_event:=xmldom.item(eventNodes,j);
--获取第j个event子节点元素值
tempArrMap3 := xmldom.getAttributes(tempNode_event);
count_date := xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap3,'count_date'));
--event network子节点个数
eventnetwork_Nodes:= xmldom.getChildNodes(tempNode_event);
networkcount := xmldom.getLength(eventnetwork_Nodes);
--DBMS_output.Put_Line('networkcount'||networkcount);
FOR k in 0.. networkcount-1
LOOP
eventnetwork_node:=xmldom.item(eventnetwork_Nodes,k);
tempArrMap4 := xmldom.getAttributes(eventnetwork_node);
--event network子节点元素值
network := xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap4,'network'));
cnt:= xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap4,'count'));
duration:= xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap4,'duration'));
action:= xmldom.getNodeValue(xmldom.getfirstChild(xmldom.item(eventnetwork_Nodes,0)));
--DBMS_output.Put_Line(account||' '||count_date||' '||client||' '||version||' '||versionType||' '||network||' '||cnt||' '||duration||' '||action);
INSERT INTO mobile_login(account,login_date,client,version,versiontype,network,cnt,duration,action)VALUES(account,count_date,client,version,versionType,network,cnt,duration,action);
END LOOP;
END LOOP;
END LOOP;
COMMIT;
END LOOP;
--步骤7:释放文档对象
xmldom.freeDocument(doc);
--步骤8:异常与错误处理
EXCEPTION
WHEN OTHERS THEN
DBMS_output.PUT_LINE(SQLERRM);
END;
再写个shell实现加载
#!/bin/sh
datestr=`date +%Y%m%d`
ls -l ./$datestr|awk '{ if(NR==1){} else print NR-1"|"'$datestr'"|./'$datestr'/"$9}'>xml_list.txt
echo "
load data
infile 'xml_list.txt'
append
into table SB_250.xml_test
fields terminated by '|'
trailing nullcols
(
file_id integer external,
loadtime char,
file_name FILLER char,
xmlstring LOBFILE(file_name) TERMINATED BY EOF
)">control.ctl
sqlldr oracle/oracle control=control.ctl
if [ $? -eq 0 ]; then
echo "$datestr:import successfully.">>xml_import.log
else
echo "$datestr:import failed.">>xml_import.log
fi