xml解析方法-CSDN博客

本文链接：https://blog.csdn.net/phoenix_cat/article/details/84366761

xml文件如下：

<?xml version="1.0" encoding="UTF-8"?>
<articles>
	<article category="xml">
		<title>xml概述</title>
		<author>janet</author>
		<email>janetvsfei@yahoo.com.cn</email>
		<date>20080801</date>
	</article>
	<article category="java">
		<title>Java基本语法</title>
		<author>janet</author>
		<email>janetvsfei@yahoo.com.cn</email>
		<date>20080802</date>
	</article>
</articles>

注意：xml文件的顶部<?xml version="1.0" encoding="UTF-8"?>这句话之前不能有任何空格，空行之类的，否则会出错。

1、用纯DOM来做。

用Element root=document.getDocumentElement()拿到根后，然后不停的遍历即可。

DOM的特点是将XML映射成一个Document，是一次性将所有XML全部载入到内存中。

例子如下：

package testXmlParse.dom;

import java.io.File;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;


public class TestXml_DOM {

	public static void main(String[] args) {

		File file=new File("D:\\temp\\Bosch\\articles.xml");
		
		DocumentBuilderFactory dbf=DocumentBuilderFactory.newInstance();
		
		try {
			DocumentBuilder db= dbf.newDocumentBuilder();
			
			Document document=db.parse(file);//开始解析文件
			
			//下面拿到根
			Element root=document.getDocumentElement();
			
			System.out.println(root.getNodeName());
			
			//开始遍历根下面的子结点
			NodeList nodeList=root.getChildNodes();
			
			for(int i=0;i<nodeList.getLength();i++){
				Node node=nodeList.item(i);
				
				if("article".equals(node.getNodeName())){
					//打印书的分类名称
					System.out.println("\r\n找到一本新书,书的分类是:"+node.getAttributes().getNamedItem("category").getNodeValue()+".");
					//取得article下面的所有子结点
					NodeList childNodeList=node.getChildNodes();
					//遍历article
					for(int j=0;j<childNodeList.getLength();j++){
						//拿到article下面的每一个item
						Node childNode=childNodeList.item(j);
						if("title".equals(childNode.getNodeName())){
							System.out.println("title:"+childNode.getTextContent());
						}else if("author".equals(childNode.getNodeName())){
							System.out.println("author:"+childNode.getTextContent());
						}else if("email".equals(childNode.getNodeName())){
							System.out.println("email:"+childNode.getTextContent());
						}else if("date".equals(childNode.getNodeName())){
							System.out.println("date:"+childNode.getTextContent());
						}
					}
				}
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}

2、用SAX来做：

比纯DOM方便，快的多，是只把需要的载入内存中，不像DOM那样全部载入内存。

缺点是只能以File为载体，单纯的String好像不能解析，研究中……

package testXmlParse.sax;

import java.io.File;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

public class XmlParse {

	public static void main(String[] args) {
		
		File xmlFile=new File("d:/temp/Bosch/onhand.xml");
		SAXParserFactory factory=SAXParserFactory.newInstance();
		try {
			SAXParser parser=factory.newSAXParser();
			parser.parse(xmlFile,  new MySaxHandler());
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}

package testXmlParse.sax;

import java.text.DateFormat;
import java.text.SimpleDateFormat;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class MySaxHandler extends DefaultHandler {
	static DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
	private String content;

	@Override
	public void characters(char[] ch, int start, int length)
			throws SAXException {
		content = new String(ch, start, length);
	}

	@Override
	public void endElement(String uri, String localName, String name)
			throws SAXException {
		if ("title".equals(name)) {
			System.out.println("标题：" + content);
		} else if ("author".equals(name)) {
			System.out.println("作者：" + content);
		} else if ("email".equals(name)) {
			System.out.println("电子邮件：" + content);
		} else if ("body".equals(name)) {
			System.out.println("内容：" + content);
		} else if ("date".equals(name)) {
			System.out.println("发表日期：" + content);
		}
	}

	@Override
	public void startElement(String uri, String localName, String name,
			Attributes attributes) throws SAXException {
		if ("article".equals(name)) {
			System.out.println("\r\n找到一篇文章，所属分类：" 
					+ attributes.getValue("category")+". ");
		}
	}

}

3、DOM4J

package testXmlParse.dom4j;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.log4j.Logger;
import org.dom4j.Element;


/**
 * @autor germmy
 * @date 2012-12-11
 * @version
 */
@SuppressWarnings("unchecked")
public class XmlUtil {
	protected static Logger logger = Logger.getLogger(Dom4jParser.class);

	public static final String[]NODENAMES={"title","author","email","date"};
	
	/**
	 * 采用指定的编码读取XML
	 * 
	 * @param fileName
	 * @return
	 */
	public static String readFile_encoding(String fileName) {

		String encoding = "UTF-8"; // 字符编码
		File file = new File(fileName);

		BufferedReader reader = null;
		StringBuffer sb = new StringBuffer("");
		try {
			// reader = new BufferedReader(new FileReader(file));
			InputStreamReader read = new InputStreamReader(new FileInputStream(
					file), encoding);
			reader = new BufferedReader(read);
			String tempString = null;
			int line = 1;
			while ((tempString = reader.readLine()) != null) {
				sb.append(tempString);
				line++;
			}
			reader.close();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if (reader != null) {
				try {
					reader.close();
				} catch (IOException e1) {
				}
			}
		}
		return sb.toString();
	}
	
	public static List getOrderInfoList(String xml){
		List list=new ArrayList();
		Map map=new HashMap();
		try {
			List articles = Dom4jParser.getElements(xml,"article");// 所有的Item节点
			if(articles!=null){
				logger.debug("article.length["+articles.size()+"]");
				for (int i = 0; i < articles.size(); i++) {
					map=new HashMap();
					list.add(map);
					Element element = (Element) articles.get(i);
					for(int j=0;j<NODENAMES.length;j++){
						Dom4jParser.parseText(map,element,NODENAMES[j]);
					}
				}
			}
		} catch (Exception e) {
			list=null;
			logger.error("",e);
		}
		return list;
	}
	
	public static void main(String[] args) {
		String xml=readFile_encoding("d:/temp/Bosch/articles.xml");
		XmlUtil.getOrderInfoList(xml);
		//getOrders(ConfigUtil.testXml);
	}
}

package testXmlParse.dom4j;

import java.util.List;
import java.util.Map;

import org.apache.log4j.Logger;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;

/**
 * @autor germmy
 * @date 2012-12-11
 * @version
 */
@SuppressWarnings("unchecked")
public class Dom4jParser {
	protected static Logger logger = Logger.getLogger(Dom4jParser.class);
	public static Document getDom4JDocument(String xml){
		Document doc = null;
		try {
			doc = DocumentHelper.parseText(xml);
		} catch (DocumentException e) {
			logger.error("",e);
		}
		return doc;
	}
	public static Element getRoot(String xml){
		Document doc=getDom4JDocument(xml);
		if(doc!=null){
			Element root = doc.getRootElement();// 指向根节点
			return root;
		}else{
			return null;
		}
	}
	public static List getElements(String xml,String nodeName){
		Document doc=getDom4JDocument(xml);
		List elements=null;
		if(doc!=null){
			Element root = doc.getRootElement();// 指向根节点
			if(root!=null){
				elements = root.elements(nodeName);// 所有的Item节点
			}
		}
		if(elements==null){
			logger.debug("can't find elements["+nodeName+"] in xml");
		}
		return elements;
	}
	
	
	public static void parseText(Map map,Element element,String nodeName){
		Element e=element.element(nodeName);
		if(e!=null){
			System.out.println("nodeName:"+nodeName);
			System.out.println("value:"+e.getTextTrim());
			map.put(nodeName, e.getTextTrim());
		}else{
			logger.debug("can't find Element["+nodeName+"] in xml");
		}
	}
}

4、SAXReader，以前写过，待整理。

refurl:http://developer.51cto.com/art/200903/117512.htm xml解析的4个方法