DOM4J vs SAX

网上都说DOM4J加载大型的xml开销很大, 这里来做一个实验


package xml.dom4j;

public class Test {

public static void main(String[] args) {
// operlog.xml 10万行 4MB 消耗JVM内存 11M
// operlog2.xml 100行 5k 消耗JVM内存 0
Test.loadLogXML("C:\\WKP_HP1\\TestCaseHp1\\src\\operlog2.xml");
}


public static void loadLogXML(String file) {
Runtime runtime = Runtime.getRuntime();
System.out.println("当前虚拟机最大可用内存为: " + runtime.maxMemory() / 1024 / 1024 + "M");
System.out.println("当前虚拟机已占用内存: " + runtime.totalMemory() / 1024 / 1024 + "M");

try {
SAXReader reader = new SAXReader();
Document doc = null;
try {
doc = reader.read(new File(file));
} catch (DocumentException e) {
System.out.println("读取配置文件错误");
return;
}
System.out.println("当前虚拟机已占用内存: " + runtime.totalMemory() / 1024 / 1024 + "M");
Element root = doc.getRootElement();
List poNodes = root.elements("po");
for (Iterator it = (Iterator) poNodes.iterator(); it.hasNext();) {
Element elm = (Element) it.next();

String beanName = elm.attributeValue("name");
String moduleName = elm.attributeValue("moduleName");
String sysId = elm.attributeValue("sysId");


List nodes = elm.elements("field");
List<String> listField = new ArrayList<String>();
for (Iterator it2 = (Iterator) nodes.iterator(); it2.hasNext();) {

Element elm2 = (Element) it2.next();
String fieldName = elm2.attributeValue("name");
String CNName = elm2.getText();
//System.out.println((fieldName + "," + CNName));

}
}
} catch (Exception e) {
System.out.println("加载日志配置文件时,碰到运行时异常: " + e.toString());
}
}
}


最后发现 加载 10万行数据的 jvm也只用了11M, 完全可以接受的。


附xml文件


<doc>
<po name="cia.common.db.orm.po.TblBkeMchntUsrInf" moduleName="MMG" sysId="2">
<field name="usrId">用户ID</field>
<field name="mchntCd">所属机构号</field>
</po>
<po name="cia.common.db.orm.po.TblBkeMchntUsrpwdInf" moduleName="MMG" sysId="2">
<field name="mchntCd">商户号</field>
<field name="usrId">用户ID</field>
<field name="usrPwd">用户密码</field>
</po>

<!-- PPP机构信息管理 -->
<po name="cia.common.db.orm.po.TblBkePppInsInfo" moduleName="CIM" sysId="2">
<field name="insTp">机构类型</field>
<field name="pppInsCd">机构号</field>
<field name="insNm">机构名称</field>
</po>
<!-- 全卡bin管理 -->
<po name="cia.common.db.orm.po.TblBkeBin" moduleName="BIM" sysId="2">
<field name="cardBin">卡BIN</field>
<field name="panLen">卡号长度</field>
<field name="issInsCd">发卡机构代码</field>
<field name="issInsCnNm">发卡机构名称</field>
<field name="cardAttr">卡性质</field>
<field name="cardBrand">卡品牌</field>
<field name="cardCata">卡类别</field>

</po>
</doc>



DOM4J 用来加载系统的配置文件还是OK的。 但用来接收外系统的数据就不合适了
1, 因为数据量的问题
2, DOM4J 适合解析固定tab name的xml, 对于处理下列的XML就杯具了。

<?xml version="1.0" encoding="utf-8"?>

<!--XML file generated by Excel Input Tool-->
<ConfigMessage xmlns="http://schemas.hp.com/CfM/ucmdb/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schemas.hp.com/CfM/ucmdb/ ConfigMessage.xsd">
<ConfigMessageHeader>
<cmdbContext>cis</cmdbContext>
<dataSource>CIS DERIVED VALUE - will be a valid MDM Company Code</dataSource>
<dataSourceContact/>
<dataSourceContactEmail>CPLUS001@cplus.com</dataSourceContactEmail>
<simulate>false</simulate>
</ConfigMessageHeader>

<CIsAndRelations>
<CIsForUpdate>
<CI>
<netdevice>
<data_adminstate>0</data_adminstate>
<data_externalid>CPLUS_consoleswitch_002</data_externalid>
<data_note>ConsoleSwitch_Note</data_note>
<data_operationstate>0</data_operationstate>
<data_origin>ESL</data_origin>
<description>Description_ConsoleSwitch</description>
<document_list>DocumentsDocumentsDocumentsDocumentsDocumentsDocum</document_list>
<name>ConsoleSwitch_Name_Judy_002</name>
<TenantOwner>ACT</TenantOwner>
<TenantsUses>CPLUS</TenantsUses>
<user_label>ConsoleSwitch_UserLable_Judy_002</user_label>
<related_ciexternalid></related_ciexternalid>
<related_cit></related_cit>
<hp_envrmt_type_nm>Pending</hp_envrmt_type_nm>
<hp_user_defined_attribute1_txt>Attribute1</hp_user_defined_attribute1_txt>
<hp_user_defined_attribute2_txt>Attribute2</hp_user_defined_attribute2_txt>
<hp_user_defined_attribute3_txt>Attribute3</hp_user_defined_attribute3_txt>
<hp_user_defined_attribute4_txt>Attribute4</hp_user_defined_attribute4_txt>
<hp_user_defined_attribute5_txt>Attribute5</hp_user_defined_attribute5_txt>
<hp_regulatory_status_flg>true</hp_regulatory_status_flg>
<sacm_lifecycle_status>Pending</sacm_lifecycle_status>
<hp_inventory_id>InventoryId123</hp_inventory_id>
<codepage>CodePage</codepage>
<hp_cust_req_excpn_flg>true</hp_cust_req_excpn_flg>
<hp_location_txt>VIA|3F</hp_location_txt>
<hp_ci_alias_nm>AliasName123</hp_ci_alias_nm>
<bios_asset_tag>BiosAssetTag01</bios_asset_tag>
<bios_serial_number>N123456</bios_serial_number>
<bios_uuid>BiosUuid001</bios_uuid>
<calculated_location>CalculatedLocation001</calculated_location>
<domain_name>HP.com</domain_name>
<host_iscomplete>true</host_iscomplete>
<host_isdesktop>true</host_isdesktop>
<host_isroute>true</host_isroute>
<host_isvirtual>true</host_isvirtual>
<host_key>HostKey1234</host_key>
<host_nnm_uid>123456</host_nnm_uid>
<host_osaccuracy>HostOSA</host_osaccuracy>
<host_osrelease>HostOSRelease123</host_osrelease>
<host_servertype>HostServerType123</host_servertype>
<memory_size>1024</memory_size>
<net_bios_name>NetBiosName1213</net_bios_name>
<node_family>NodeFamily123</node_family>
<node_model>AIX 6.1</node_model>
<node_role>ConsoleSwitch</node_role>
<os_family>baremetal_hypervisor</os_family>
<primary_dns_name>PrimaryDnsName123</primary_dns_name>
<serial_number>SerialNo123</serial_number>
<swap_memory_size>100</swap_memory_size>
<sys_object_id>SysObjectId123</sys_object_id>
<vendor>IBM</vendor>
<hp_product_code_txt>ProductNo123</hp_product_code_txt>
<hp_approval_group_txt>StringVal456</hp_approval_group_txt>
<hp_approval_sequence_nbr>59</hp_approval_sequence_nbr>
</netdevice>
</CI>
</CIsForUpdate>
<relationsForUpdate>
</relationsForUpdate>
</CIsAndRelations>
</ConfigMessage>


这时候适合用SAX来解析


package com.hp.ucmdb.util;

import java.io.FileInputStream;
import java.io.IOException;

import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

public class ParseFileUtils extends DefaultHandler {

public static void main(String[] args) throws SAXException, IOException {
ParseFileUtils instance = new ParseFileUtils();
instance.doParsing("c:\\222.xml");
}

// method for file validation and parsing.
public void doParsing(String fileName) throws SAXException,
IOException {
XMLReader parser = XMLReaderFactory.createXMLReader();
ContentHandler cHandler = this;
parser.setContentHandler(cHandler);

FileInputStream input = null;
try {
input = new FileInputStream(fileName);
parser.parse(new InputSource(input));
} catch (SAXParseException ex) {
throw ex;
} finally {
if (input != null) {
try {
input.close();
} catch (Exception e) {
}
}
}
}

String v = "";
StringBuffer sb = null;

public void startElement(String namespace, String localName,
String qualifiedName, Attributes atts) throws SAXException {
sb = new StringBuffer();
System.out.println(localName);
}

public void characters(char[] ch, int start, int length)
throws SAXException {
sb.append(ch, start, length);
}
public void endElement(String namespaceURI, String localName, String qName)
throws SAXException {
String strValue = sb.toString();
if(strValue != null && !"".equals(strValue) && !"".equals(strValue.trim())){
System.out.println(" v: " + strValue);
}
sb = new StringBuffer();

}


}



另外, DOM4J 和 SAX 都可以进行schema验证
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值