java中解析xml文件有四种方式,分别是DOM、SAX、JDOM、DOM4J,这四种前两种是系统自带的,后两种需要导入jar包,其中先要对xml文件有一个基本的了解。
xml文件是为了不同程序,不同平台之间数据传输,不同数据平台的数据共享的作用。是以树形结构的存储的。
<?xml version="1.0" encoding="UTF-8"?>
<bookstore>
<book id="1" size="medium">
<name >安徒生童话</name>
<price>89</price>
<language>英文</language>
<year>2004</year>
</book>
<book id="2" size="lower">
<name>一千零一夜</name>
<price>65</price>
<language>日语</language>
<year>2014</year>
</book>
<book id="3" size="large">
<name>美好</name>
<price>99</price>
<language>中文</language>
<year>2015</year>
</book>
</bookstore>
这就是一个简单的xml文件,第一行注意的是这是书写规范,并且代码格式为utf-8,还有GBK等,可以在这里修改
后面的每一个节点都有相对应的结束节点,在<>中的id和size就是这个节点的属性,注意需用“ ”,它内部的节点可以理解为子节点。
DOM解析
节点类型 named constant nodeName()的返回值 nodeValue()的返回值
element ELEMENT_NODE element name null
Attr ATTRIBUTE_NODE 属性名称 属性值
text TEXT_NODE #text 节点内容
上面这个表还是挺重要的,不知道为什么要这样设置,反正挺烦人的还是得记住,尤其是在DOM解析时
先贴代码:
package project_xml;
import java.io.File;
import java.io.IOException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class xml_dom {
public static void main(String[] args) {
xml_dom xml_dom1=new xml_dom();
xml_dom1.xml_dom_parse();
//xml_dom1.creat_xml();
}
public void xml_dom_parse() {
DocumentBuilderFactory dbf=DocumentBuilderFactory.newInstance();
try {
DocumentBuilder db=dbf.newDocumentBuilder();
Document docu=db.parse("book.xml");
//获取所有的书籍节点
NodeList booklist=docu.getElementsByTagName("book");
for(int i =0;i<booklist.getLength();i++){
Node book_item=booklist.item(i);
System.out.println("第"+(i+1)+"本书");
NamedNodeMap node_att=book_item.getAttributes();//读取属性,并存在一个<span style="font-family: Arial, Helvetica, sans-serif;">NamedNodeMap中</span>
for(int j=0;j<node_att.getLength();j++){
Node node=node_att.item(j);
System.out.print(node.getNodeName()+":"+node.getNodeValue()+" ");
System.out.println();
}
NodeList book_child=book_item.getChildNodes();
for(int k=0;k<book_child.getLength();k++){
Node book_child_ele=book_child.item(k);
if(book_child_ele.getNodeType()==Node.ELEMENT_NODE){//如果没有会打印出很多空格,因为text也是一种节点类型,
//System.out.println(book_child_ele.getNodeName()+":"+book_child_ele.getFirstChild().getNodeValue());
这个就是采集到这个<name></name>中所有的所有的text
System.out.println(book_child_ele.getNodeName()+":"+book_child_ele.getTextContent());
}
}
System.out.println("以上就是第"+(i+1)+"本书");
}
} catch (ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void creat_xml() {
try {
DocumentBuilderFactory dbf=DocumentBuilderFactory.newInstance();
DocumentBuilder db=dbf.newDocumentBuilder();
Document document=db.newDocument();
//可以将standalone设置为true,这样就不会显示了,表示的意思是不需要说明文档
document.setXmlStandalone(true);
Element bookstore=document.createElement("bookstore");
Element book=document.createElement("book");
Element name=document.createElement("name");
name.setTextContent("安徒生童话");
book.setAttribute("id", "1");
book.setAttribute("size", "lower");
book.appendChild(name);
bookstore.appendChild(book);
document.appendChild(bookstore);
TransformerFactory tf=TransformerFactory.newInstance();
Transformer transformer=tf.newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.transform(new DOMSource(document), new StreamResult(new File("book1.xml")));
} catch (ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (TransformerConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (TransformerException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
<span style="font-size:18px;">上面有两个方法,一个是解析,一个生成,然后在主函数中调用,其实在解析时:</span>
DocumentBuilderFactory—><span style="font-family: Arial, Helvetica, sans-serif;">DocumentBuilder-></span><span style="font-family: Arial, Helvetica, sans-serif;">Document(这里利用</span><span style="font-family: Arial, Helvetica, sans-serif;">DocumentBuilder的parse()方法,将xml文件的路径传入,这里讲xml文件放到在项目下,所以是相对路径</span><span style="font-family: Arial, Helvetica, sans-serif;">);然后利用</span><span style="font-family: Arial, Helvetica, sans-serif;">getElementsByTagName("book")获取所有的book节点,并存在Nodelist中,随后是先读取属性,然后是子节点,在解析子节点时就要用到上面那章表了,同时在获取节点值时,推荐使用</span><span style="font-family: Arial, Helvetica, sans-serif;">getTextContent());</span>
后面在构建xml文件时米有构建那么多,只有一个book节点,我自己感觉就是熟知要调用哪些类和方法,然后多熟悉。下面是运行的结果截图
=======================================================================================
sax解析。这个要比较麻烦一些,
package project_xml_sax;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class SAXHandle extends DefaultHandler {
private int book_index=0;
public Book book;
String value;
ArrayList<Book> book_list=new ArrayList<Book>();
/*
* (non-Javadoc)
* @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
* 开始xml文件中节点的开始的标签
*/
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
// TODO Auto-generated method stub
super.startElement(uri, localName, qName, attributes);
if(qName.equals("book")){
book=new Book();
book_index++;
for(int i=0;i<attributes.getLength();i++){
System.out.println(attributes.getQName(i)+":"+attributes.getValue(i));
if(attributes.getQName(i).equals("id")){
book.setId(attributes.getValue("id"));
}else if(attributes.getQName(i).equals("size")){
book.setSize(attributes.getValue("size"));
}
}
}
else if(!qName.equals("bookstore")){
System.out.print(qName+":");
}
}
/*
* (non-Javadoc)
* @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
* xml文件节点的结束标签
*/
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
// TODO Auto-generated method stub
super.endElement(uri, localName, qName);
if(qName.equals("book")){
book_list.add(book);
book=null;
System.out.println("---------"+(book_index)+"------结束------");
}else if (qName.equals("name")) {
book.setName(value);
}else if (qName.equals("language")) {
book.setLanguage(value);
}else if (qName.equals("year")) {
book.setYear(value);
}else if (qName.equals("price")) {
book.setPrice(value);
}
}
@Override
public void startDocument() throws SAXException {
// TODO Auto-generated method stub
super.startDocument();
System.out.println("start");
}
@Override
public void endDocument() throws SAXException {
// TODO Auto-generated method stub
super.endDocument();
System.out.println("end");
}
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
// TODO Auto-generated method stub
super.characters(ch, start, length);
value=new String(ch, start, length);
if(!value.trim().equals(""))
System.out.println(value);
}
}
package project_xml_sax;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.List;
import javax.swing.TransferHandler;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
public class SAXTest {
public void sax_xml(SAXHandle handler) {
// TODO Auto-generated method stub
SAXParserFactory factory= SAXParserFactory.newInstance();
try {
SAXParser parser=factory.newSAXParser();
//SAXHandle handler=new SAXHandle();
ObjectInputStream ois=new ObjectInputStream(new FileInputStream("demo\\book.dat"));
parser.parse("book.xml", handler);
serialize(handler.book_list);
//这个可以理解为将一个集合序列化和反序列化,值得参考
while (true) {
Book books=(Book)ois.readObject();
System.out.println(books);
if(books==null){
break;
}
}
ois.close();
} catch (ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void serialize(ArrayList<Book> book_list) {
//将一个集合序列化
try {
ObjectOutputStream ops=new ObjectOutputStream(new FileOutputStream("demo\\book.dat"));
for (Book books : book_list) {
ops.writeObject(books);
}
ops.writeObject(null);
ops.flush();
ops.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void creat_xml_sax(ArrayList<Book> booklist) {
try {
SAXTransformerFactory saxtransformer=(SAXTransformerFactory) SAXTransformerFactory.newInstance();
TransformerHandler handler=saxtransformer.newTransformerHandler();
Transformer transfor=handler.getTransformer();
transfor.setOutputProperty(OutputKeys.INDENT,"yes");
transfor.setOutputProperty(OutputKeys.ENCODING, "utf-8");
Result result=new StreamResult(new File("book_sax.xml"));
handler.setResult(result);
AttributesImpl atts=new AttributesImpl();
handler.startDocument();
handler.startElement("", "", "bookstore", atts);
for (Book book : booklist) {
atts.clear();
atts.addAttribute("", "", "id", "", book.getId());
atts.addAttribute("", "", "size", "", book.getSize());
handler.startElement("", "", "book", atts);
if(!book.getName().equals("")&&book.getName()!=null){
atts.clear();
handler.startElement("", "", "name", atts);
handler.characters(book.getName().toCharArray(), 0, book.getName().length());
handler.endElement("", "", "name");
}
if(!book.getPrice().equals("")&&book.getPrice()!=null){
atts.clear();
handler.startElement("", "", "price", atts);
handler.characters(book.getPrice().toCharArray(), 0, book.getPrice().length());
handler.endElement("", "", "price");
}
if(!book.getYear().equals("")&&book.getYear()!=null){
atts.clear();
handler.startElement("", "", "year", atts);
handler.characters(book.getYear().toCharArray(), 0, book.getYear().length());
handler.endElement("", "", "year");
}
if(!book.getLanguage().equals("")&&book.getLanguage()!=null){
atts.clear();
handler.startElement("", "", "language", atts);
handler.characters(book.getLanguage().toCharArray(), 0, book.getLanguage().length());
handler.endElement("", "", "language");
}
handler.endElement("", "", "book");
}
handler.endElement("", "", "bookstore");
handler.endDocument();
} catch (TransformerConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void main(String[] args) {
SAXTest sax=new SAXTest();
SAXHandle handler=new SAXHandle();
sax.sax_xml(handler);
//sax.creat_xml_sax(handler.book_list);
}
}
是用来表示解析开始和解析结束。
获取一个SAXParsefactor的实例,可以通过newInstance()的方法
通过实例的newSAXParse()来获取SAXParse实例
SAXParse实例通过parse()的方法
创建一个类继承DefaultHandler()。重写其中的一些方法进行业务处理并创建这个类的实例handler
创建一个类继承DefaultHandler(),一般重写的方法有startelement和endelement,还有startdoeument和enddocument,还有character()等一系列的方法
***上面有一个序列化的,是我自己硬加上去,在读取xml文件后,变成一个Book类的对象,(Book类就不贴代码了,很简单,各种属性,然后set和get方法)最后将这些对象序列化,这完全是没什么意义的,只是学习了对象序列化,看看能不能写出来。
****在后面还有一个是如何利用SAX生成xml文件。
后续两种稍等。。。。
JDOM解析
创建一个SAXBuilder的对象,接着创建一个输入流,将xml文件加载到输入流中
最后通过saxbuilder的build()方法,将输入流加载到saxbuilder,并返回一个document
通过doceument对象获取xml文件的根节点(利用getrootElement()方法)
获取根节点下的子节点的集合(利用getchildren()方法)
public class jdom_xml {
public static List<Book> books_list=new ArrayList<Book>();
//book类就是自己创建的,可以自己写一下,就是成员和get、set方法
public static void main(String[] args) {
// TODO Auto-generated method stub
new jdom_xml().creatJDOM_xml();
}
public void JDOM() {
SAXBuilder builder=new SAXBuilder();
try {
Document document=builder.build(new FileInputStream("book.xml"));
Element book_root=document.getRootElement();
//获去所有的书籍
List<Element> book_list=book_root.getChildren();
for (Element book : book_list) {
System.out.println("==========第"+(book_list.indexOf(book)+1)+"本书===========");
Book books=new Book();
List<Attribute> book_attr=book.getAttributes();
for (Attribute attr : book_attr) {
System.out.println(attr.getName()+":"+attr.getValue());
if(attr.getName().equals("id")){
books.setId(attr.getValue());
}else if(attr.getName().equals("size")){
books.setSize(attr.getValue());
}
}
List<Element> book_children=book.getChildren();
for (Element element : book_children) {
System.out.println(element.getName()+":"+element.getValue());
if(element.getName().equals("year")){
books.setYear(element.getValue());
}else if(element.getName().equals("name")){
books.setName(element.getValue());
}else if(element.getName().equals("price")){
books.setPrice(element.getValue());
}else if(element.getName().equals("language")){
books.setLanguage(element.getValue());
}
}
books_list.add(books);
books=null;
System.out.println(books_list.size());
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (JDOMException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void creatJDOM_xml() {
Element rss=new Element("rss");
rss.setAttribute("version", "2.0");
Element channel=new Element("channel");
CDATA cdata=new CDATA("<我们是好学生>");
channel.addContent(cdata);
//channel.setText("askhdfskafh");
rss.addContent(channel);
Document document=new Document(rss);
Format format=Format.getCompactFormat();
format.setIndent("");
XMLOutputter output=new XMLOutputter(format);
try {
output.output(document, new FileOutputStream(new File("bookjdom.xml")));
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
DOM4J解析
创建SAXReader的对象reader,用过reader对象的read方法加载book,xml文件,并获取document对象
通过document对象获取根节点的bookstores,通过element对象的elementIterator方法获取迭代器
遍历迭代器,获取根节点中的信息,获取节点时可以通过迭代器再迭代下就可以了
public class dom4j_xml {
public static void main(String[] args) {
// TODO Auto-generated method stub
dom4j_xml a=new dom4j_xml();
a.creat_dom4j();
}
public void DOM4J_xml() {
int book_index=0;
try {
SAXReader reader=new SAXReader();
Document document=reader.read(new File("book.xml"));
Element book_root=document.getRootElement();
Iterator iterator=book_root.elementIterator();
while(iterator.hasNext()){
book_index++;
Element book=(Element)iterator.next();
List<Attribute> book_attr=book.attributes();
for (Attribute attribute : book_attr) {
System.out.println(attribute.getName()+":"+attribute.getValue());
}
Iterator book_node_iter=book.elementIterator();
while(book_node_iter.hasNext()){
Element book_node=(Element)book_node_iter.next();
System.out.println(book_node.getName()+":"+book_node.getStringValue());
}
System.out.println("============第"+(book_index)+"本结束==========");
}
} catch (DocumentException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void creat_dom4j() {
Document document=DocumentHelper.createDocument();
Element element_rss=document.addElement("rss");
element_rss.addAttribute("version", "2.0");
Element element_channel=element_rss.addElement("channel");
Element element_title=element_channel.addElement("title");
element_title.setText("我们是爱好啥打法和");
OutputFormat format=OutputFormat.createPrettyPrint();
//format.setEncoding("");
try {
XMLWriter writer=new XMLWriter(new FileOutputStream(new File("rss.xml")), format);
//write中的方法,可以设置是否转义字符
writer.setEscapeText(false);
writer.write(document);
writer.close();
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
上面就是四种方法,还望大神们多多指正,可以多多提供一些方向