JAVA原生API读取XML大文件的DOM方式和SAX方式比较

JAVA原生API读取XML大文件的DOM方式和SAX方式比较

flyflyfly55555

分类专栏: J2SE综合 文章标签: XMLJavaEclipseJDKApache

一直都在使用dom的方式读取xml文件,但如果稍大点的xml文件那么dom方式就有点不太适合。

研究了下jdk的api,用dom和sax方式的解析结果做了个对比

要解析的xml内容格式如下

<?xml version="1.0" encoding="UTF-8"?>
<urlset>
<url>
<loc>商品链接访问地址</loc>
<data>
<display>
<title>商品名称</title>
<price>价格</price>
<image>
商品图片访问地址
</image>
<description>商品描述</description>
<barCode>条形码值</barCode>
<area>产地 (北京)</area>
<producedate>生产日期 (2011-11-11)</producedate>
<manufacturers>生产厂家  (某某某)</manufacturers>
</display>
</data>
</url>
//.....更更多
</urlset>

 

xml文件大小16.5M

 

首先是dom方式读取,代码如下

 
  1. package test.xml;

  2.  
  3. import java.util.ArrayList;

  4. import java.util.HashMap;

  5. import java.util.List;

  6. import java.util.Map;

  7. import java.util.Set;

  8.  
  9. import javax.xml.parsers.DocumentBuilderFactory;

  10.  
  11. import org.w3c.dom.Document;

  12. import org.w3c.dom.Element;

  13. import org.w3c.dom.Node;

  14. import org.w3c.dom.NodeList;

  15.  
  16. public class JDKBigXmlDomParse {

  17.  
  18. private int statmentSize = 6;

  19. private List<Map<String, Object>> dataList = new ArrayList<Map<String, Object>>(statmentSize);

  20.  
  21. public void test() throws Exception{

  22. String uri = "f:\\test.xml";

  23. Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(uri);

  24. NodeList urls = doc.getElementsByTagName("url");

  25. int length = urls.getLength();

  26. for(int i=0;i<length;i++){

  27. Node node = urls.item(i);

  28. if(node.getNodeType() != Node.ELEMENT_NODE){

  29. continue;

  30. }

  31. Element urlElement = (Element)node;

  32. Map<String,Object> entry = parseEntity(urlElement);

  33. if(!entry.isEmpty()){

  34. dataList.add(entry);

  35. if(dataList.size() == statmentSize){

  36. doSomeThing();

  37. }

  38. }

  39. }

  40. }

  41.  
  42. private Map<String,Object> parseEntity(Element element){

  43. Map<String,Object> map = new HashMap<String, Object>();

  44. map.put("loc", getElementValueByTagName(element,"loc"));

  45. map.put("title", getElementValueByTagName(element,"title"));

  46. map.put("price", getElementValueByTagName(element,"price"));

  47. map.put("image", getElementValueByTagName(element,"image"));

  48. map.put("description", getElementValueByTagName(element,"description"));

  49. map.put("barCode", getElementValueByTagName(element,"barCode"));

  50. map.put("area", getElementValueByTagName(element,"area"));

  51. map.put("producedate", getElementValueByTagName(element,"producedate"));

  52. map.put("manufacturers", getElementValueByTagName(element,"manufacturers"));

  53. return map;

  54. }

  55.  
  56. private String getElementValueByTagName(Element element,String tagName){

  57. NodeList nodeList = element.getElementsByTagName(tagName);

  58. String value = "";

  59. if(nodeList.getLength() != 0){

  60. Node node = nodeList.item(0);

  61. value = node.getFirstChild().getNodeValue().trim();

  62. }

  63. return value;

  64. }

  65.  
  66. private void doSomeThing(){

  67. //printMapList(dataList);

  68. dataList.clear();

  69. }

  70.  
  71. private void printMapList(List<Map<String,Object>> dataList){

  72. boolean first = true;

  73. for(Map<String,Object> map:dataList){

  74. System.out.println();

  75. System.out.print("{");

  76. Set<Map.Entry<String, Object>> entries = map.entrySet();

  77. for(Map.Entry<String, Object> entry:entries){

  78. if(!first){

  79. System.out.print(",");

  80. }

  81. System.out.print("\""+entry.getKey()+"\":");

  82. System.out.print("\""+entry.getValue()+"\"");

  83. first = false;

  84. }

  85. first = true;

  86. System.out.print("}");

  87. }

  88. System.out.println();

  89. }

  90.  
  91. public static void main(String[] args) throws Exception{

  92. long start = System.nanoTime();

  93. new JDKBigXmlDomParse().test();

  94. long end = System.nanoTime();

  95. System.out.println("耗时:"+(end-start)/1000000000.0+"秒");

  96. }

  97.  
  98. }

 运行结果:

耗时:3.212168172秒

 

sax方式读取,代码如下:

 
  1. package test.xml;

  2.  
  3. import java.io.FileInputStream;

  4. import java.io.InputStream;

  5. import java.util.ArrayList;

  6. import java.util.HashMap;

  7. import java.util.List;

  8. import java.util.Map;

  9. import java.util.Set;

  10.  
  11. import javax.xml.parsers.SAXParser;

  12. import javax.xml.parsers.SAXParserFactory;

  13.  
  14. import org.apache.commons.lang.StringUtils;

  15. import org.xml.sax.Attributes;

  16. import org.xml.sax.SAXException;

  17. import org.xml.sax.helpers.DefaultHandler;

  18.  
  19. public class JDKBigXmlSaxParse extends DefaultHandler {

  20.  
  21. private int statmentSize = 6;

  22. private List<Map<String, Object>> dataList = new ArrayList<Map<String, Object>>(statmentSize);

  23. private Map<String, Object> dataMap;

  24. private String currentTag = "";

  25.  
  26. public void test() throws Exception {

  27. SAXParser sax = SAXParserFactory.newInstance().newSAXParser();

  28. InputStream in = new FileInputStream("f:\\test.xml");

  29. sax.parse(in, this);

  30. in.close();

  31. }

  32.  
  33. @Override

  34. public void characters(char[] ch, int start, int length)throws SAXException {

  35. String value = new String(ch, start, length);

  36. if(!StringUtils.isBlank(value)){

  37. dataMap.put(currentTag, value.trim());

  38. }

  39. }

  40.  
  41. @Override

  42. public void endElement(String uri, String localName, String qName)

  43. throws SAXException {

  44. if("url".equals(qName)){

  45. dataList.add(dataMap);

  46. //dataMap.clear();

  47. }

  48.  
  49. if(dataList.size() == statmentSize){

  50. doSomeThing();

  51. dataList.clear();

  52. }

  53.  
  54. if("urlset".equals(qName) && dataList.size() != 0){

  55. doSomeThing();

  56. dataList.clear();

  57. }

  58.  
  59. }

  60.  
  61. @Override

  62. public void startElement(String uri, String localName, String qName,

  63. Attributes attributes) throws SAXException {

  64. if ("url".equals(qName)) {

  65. dataMap = new HashMap<String, Object>();

  66. return;

  67. }

  68.  
  69. currentTag = qName;

  70. }

  71.  
  72. public static void main(String[] args) throws Exception {

  73. long start = System.nanoTime();

  74. new JDKBigXmlSaxParse().test();

  75. long end = System.nanoTime();

  76. System.out.println("耗时:"+(end-start)/1000000000.0+"秒");

  77. }

  78.  
  79. public void doSomeThing(){

  80. //printMapList(dataList);

  81. }

  82.  
  83. private void printMapList(List<Map<String,Object>> dataList){

  84. boolean first = true;

  85. for(Map<String,Object> map:dataList){

  86. System.out.println();

  87. System.out.print("{");

  88. Set<Map.Entry<String, Object>> entries = map.entrySet();

  89. for(Map.Entry<String, Object> entry:entries){

  90. if(!first){

  91. System.out.print(",");

  92. }

  93. System.out.print("\""+entry.getKey()+"\":");

  94. System.out.print("\""+entry.getValue()+"\"");

  95. first = false;

  96. }

  97. first = true;

  98. System.out.print("}");

  99. }

  100. System.out.println();

  101. }

  102.  
  103. }

 运行结果:

耗时:0.639864769秒

 

可以看到dom消耗的时间是sax方式的5倍。结论:如果只是读取xml文件,还是sax方式强。。。

 

而且在eclipse里面用dom方式运行的时候可能会出现eclipse java.lang.OutOfMemoryError: Java heap space这个问题

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值