xml文件如下:
<?xml version="1.0" encoding="UTF-8"?> <articles> <article category="xml"> <title>xml概述</title> <author>janet</author> <email>janetvsfei@yahoo.com.cn</email> <date>20080801</date> </article> <article category="java"> <title>Java基本语法</title> <author>janet</author> <email>janetvsfei@yahoo.com.cn</email> <date>20080802</date> </article> </articles>
注意:xml文件的顶部<?xml version="1.0" encoding="UTF-8"?>这句话之前不能有任何空格,空行之类的,否则会出错。
1、用纯DOM来做。
用Element root=document.getDocumentElement()拿到根后,然后不停的遍历即可。
DOM的特点是将XML映射成一个Document,是一次性将所有XML全部载入到内存中。
例子如下:
package testXmlParse.dom;
import java.io.File;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class TestXml_DOM {
public static void main(String[] args) {
File file=new File("D:\\temp\\Bosch\\articles.xml");
DocumentBuilderFactory dbf=DocumentBuilderFactory.newInstance();
try {
DocumentBuilder db= dbf.newDocumentBuilder();
Document document=db.parse(file);//开始解析文件
//下面拿到根
Element root=document.getDocumentElement();
System.out.println(root.getNodeName());
//开始遍历根下面的子结点
NodeList nodeList=root.getChildNodes();
for(int i=0;i<nodeList.getLength();i++){
Node node=nodeList.item(i);
if("article".equals(node.getNodeName())){
//打印书的分类名称
System.out.println("\r\n找到一本新书,书的分类是:"+node.getAttributes().getNamedItem("category").getNodeValue()+".");
//取得article下面的所有子结点
NodeList childNodeList=node.getChildNodes();
//遍历article
for(int j=0;j<childNodeList.getLength();j++){
//拿到article下面的每一个item
Node childNode=childNodeList.item(j);
if("title".equals(childNode.getNodeName())){
System.out.println("title:"+childNode.getTextContent());
}else if("author".equals(childNode.getNodeName())){
System.out.println("author:"+childNode.getTextContent());
}else if("email".equals(childNode.getNodeName())){
System.out.println("email:"+childNode.getTextContent());
}else if("date".equals(childNode.getNodeName())){
System.out.println("date:"+childNode.getTextContent());
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
2、用SAX来做:
比纯DOM方便,快的多,是只把需要的载入内存中,不像DOM那样全部载入内存。
缺点是只能以File为载体,单纯的String好像不能解析,研究中……
package testXmlParse.sax;
import java.io.File;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
public class XmlParse {
public static void main(String[] args) {
File xmlFile=new File("d:/temp/Bosch/onhand.xml");
SAXParserFactory factory=SAXParserFactory.newInstance();
try {
SAXParser parser=factory.newSAXParser();
parser.parse(xmlFile, new MySaxHandler());
} catch (Exception e) {
e.printStackTrace();
}
}
}
package testXmlParse.sax;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class MySaxHandler extends DefaultHandler {
static DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
private String content;
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
content = new String(ch, start, length);
}
@Override
public void endElement(String uri, String localName, String name)
throws SAXException {
if ("title".equals(name)) {
System.out.println("标题:" + content);
} else if ("author".equals(name)) {
System.out.println("作者:" + content);
} else if ("email".equals(name)) {
System.out.println("电子邮件:" + content);
} else if ("body".equals(name)) {
System.out.println("内容:" + content);
} else if ("date".equals(name)) {
System.out.println("发表日期:" + content);
}
}
@Override
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
if ("article".equals(name)) {
System.out.println("\r\n找到一篇文章,所属分类:"
+ attributes.getValue("category")+". ");
}
}
}
3、DOM4J
package testXmlParse.dom4j;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import org.dom4j.Element;
/**
* @autor germmy
* @date 2012-12-11
* @version
*/
@SuppressWarnings("unchecked")
public class XmlUtil {
protected static Logger logger = Logger.getLogger(Dom4jParser.class);
public static final String[]NODENAMES={"title","author","email","date"};
/**
* 采用指定的编码读取XML
*
* @param fileName
* @return
*/
public static String readFile_encoding(String fileName) {
String encoding = "UTF-8"; // 字符编码
File file = new File(fileName);
BufferedReader reader = null;
StringBuffer sb = new StringBuffer("");
try {
// reader = new BufferedReader(new FileReader(file));
InputStreamReader read = new InputStreamReader(new FileInputStream(
file), encoding);
reader = new BufferedReader(read);
String tempString = null;
int line = 1;
while ((tempString = reader.readLine()) != null) {
sb.append(tempString);
line++;
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e1) {
}
}
}
return sb.toString();
}
public static List getOrderInfoList(String xml){
List list=new ArrayList();
Map map=new HashMap();
try {
List articles = Dom4jParser.getElements(xml,"article");// 所有的Item节点
if(articles!=null){
logger.debug("article.length["+articles.size()+"]");
for (int i = 0; i < articles.size(); i++) {
map=new HashMap();
list.add(map);
Element element = (Element) articles.get(i);
for(int j=0;j<NODENAMES.length;j++){
Dom4jParser.parseText(map,element,NODENAMES[j]);
}
}
}
} catch (Exception e) {
list=null;
logger.error("",e);
}
return list;
}
public static void main(String[] args) {
String xml=readFile_encoding("d:/temp/Bosch/articles.xml");
XmlUtil.getOrderInfoList(xml);
//getOrders(ConfigUtil.testXml);
}
}
package testXmlParse.dom4j;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
/**
* @autor germmy
* @date 2012-12-11
* @version
*/
@SuppressWarnings("unchecked")
public class Dom4jParser {
protected static Logger logger = Logger.getLogger(Dom4jParser.class);
public static Document getDom4JDocument(String xml){
Document doc = null;
try {
doc = DocumentHelper.parseText(xml);
} catch (DocumentException e) {
logger.error("",e);
}
return doc;
}
public static Element getRoot(String xml){
Document doc=getDom4JDocument(xml);
if(doc!=null){
Element root = doc.getRootElement();// 指向根节点
return root;
}else{
return null;
}
}
public static List getElements(String xml,String nodeName){
Document doc=getDom4JDocument(xml);
List elements=null;
if(doc!=null){
Element root = doc.getRootElement();// 指向根节点
if(root!=null){
elements = root.elements(nodeName);// 所有的Item节点
}
}
if(elements==null){
logger.debug("can't find elements["+nodeName+"] in xml");
}
return elements;
}
public static void parseText(Map map,Element element,String nodeName){
Element e=element.element(nodeName);
if(e!=null){
System.out.println("nodeName:"+nodeName);
System.out.println("value:"+e.getTextTrim());
map.put(nodeName, e.getTextTrim());
}else{
logger.debug("can't find Element["+nodeName+"] in xml");
}
}
}
4、SAXReader,以前写过,待整理。
refurl:http://developer.51cto.com/art/200903/117512.htm xml解析的4个方法