解析XML文档_1 编写程序,解析score.xml文件-CSDN博客

本文链接：https://blog.csdn.net/Cxiaokai/article/details/7489727

在这里对于xml文档用java语言的解析，我想说三点。

1.读取xml文档。在读取xml文档的时候，我们要将dom4j.jar包导入到eclipse中。然后用dom4j.jar包中的类解析xml文档。解析过程如下：

import java.io.File;
import java.util.Iterator;

import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

public class ReadXml {
	public static void main(String[] args)throws DocumentException{
		String fileName = "src\\parser\\book.xml";//在这个目录下找到这个文件
		ReadXml test = new ReadXml(); //创建一个对象
		Document doc = test.read(fileName); //用这个对象读如xml文件中的内容
		Element root = test.getRootElement(doc);//得到根节点
		test.list(root);//从根节点开始遍历
	}
	
	public Document read(String fileName)throws DocumentException{
		SAXReader reader = new SAXReader();
		Document document = reader.read(new File(fileName));
		return document;	
	}
	
	public Element getRootElement(Document doc){
		return doc.getRootElement();
	}
	public void list(Element root){
		for(Iterator i=root.elementIterator("计算机书籍");i.hasNext();){
			Element book = (Element)i.next();
			for(Iterator it = book.attributeIterator();it.hasNext();){
				Attribute attribute = (Attribute)it.next();
				System.out.println(attribute.getName()+" "+attribute.getValue());
			}
			String bookname = book.elementText("书名");
			String author = book.elementText("作者");
			String price = book.elementText("价格");
			
			System.out.println(bookname);
			System.out.println(author);
			System.out.println(price);
			
			
			//这这样可以把xml文件的中的数据插入到数据库中
			/**String sql = "insert into book values(?,?,?)";
			 * stmt = conn.prepareStatement(sql);
			 * stmt.setString(1, bookname);
			 * stmt.setString(2, author);
			 * stmt.setString(3, price);
			 * stmt.executeUpdate();
			 */
			for(Iterator it = book.element("作者").attributeIterator();it.hasNext();){
				Attribute attribute = (Attribute)it.next();
				System.out.println(attribute.getName()+" "+attribute.getValue());
			}
			System.out.println(book.elementText("简介"));
		}
	}
}

读入的是如下的xml文档：

<?xml version="1.0" encoding="GB2312"?> 
 <书籍列表>
	<计算机书籍 hot="true" id="isbn1234">
		<书名>XML的今生今世</书名>         
		<价格>66.66</价格>
		<作者 电话="123456" 地址="beijing">bruce eckel</作者>
        <简介>该书详细描写XML的从出生到衰老的整个过程</简介>
  	</计算机书籍>
  	<计算机书籍 hot="false" id="isbn2345">
		<书名>21天精通XXX</书名>         
		<价格>56.00</价格>
		<作者 电话="1234" 地址="tianjin">****</作者>
        <简介>胡说八道</简介>
  	</计算机书籍>
 </书籍列表>

读取xml文档的步骤如下：

a.找到xml文档的位置，构造文件File对象。

b.用SAXReader类中的read方法，把File对象读入，返回的是一个Document对象。开始解析xml文档的第一步。

c.用Document对象调用getRootElement()方法，得到整个文档的根节点。因为xml文件只有一个根节点，否则无法解析。

d.得到的根节点的类型是Element类型的。利用这个根节点类型，就可以定位文档中的节点。并且用相应的方法，就可以得到文档中的一些信息。

2.建立xml文档，首先要指明生成这个xml文档的位置。让后就用addElement(“字符串”);用指定的字符串生成指定的节点，返回一个Element对象，返回的Element对象，在调用这个方法，那么生成的节点就是刚才节点的子节点。用这种方式可以生成xml文档。代码如下：

import java.io.FileWriter;
import java.io.IOException;
import java.util.Random;

import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.XMLWriter;

public class BuildXml{
	public static void main(String[] args){
		BuildXml xml = new BuildXml();
		String fileName = "src/student.xml";//输出的文件名
		String[] studentName={"张三丰","花木兰","郭靖","任我行","赵敏","奥巴马"}; //学生列表
		String[] courseName ={"射箭","骑马","烹饪","种花","打扑克"};
		xml.buildXml(fileName, studentName, courseName);//学生列表课程列表输出的目的地
	}
	
	public void buildXml(String fileName, String[] studentName, String[] courseName){
		
		Document doc = DocumentHelper.createDocument();//先构造Document()
		Element recordElement = doc.addElement("Record"); //这个方法是增加元素，就是增加的根元素
		Element headElement = recordElement.addElement("Head");//创建Element,创建在根节点下，是根节点的直接子节点
		Element bodyElement = recordElement.addElement("Body");
		Element codeE1 = headElement.addElement("Code");
		codeE1.setText("SD1101"); //给叶子元素设置值
		Element examE1 = headElement.addElement("Exam");
		examE1.setText("是");
		addParamList(bodyElement, courseName, studentName);//在bodyElement加东西
		try{
			FileWriter fileWriter = new FileWriter(fileName);
			OutputFormat xmlFormat = OutputFormat.createPrettyPrint();//设置一个尽可能好看的输出
			xmlFormat.setEncoding("gbk");//因为有中文
			//创建写文件，输入参数是文件，格式
			XMLWriter xmlWriter = new XMLWriter(fileWriter, xmlFormat);//dom4j中的
			//将doc写入文档
			xmlWriter.write(doc);
			xmlWriter.close();
		}catch(IOException e){
			e.printStackTrace();
		}
	}
	private void addParamList(Element bodyE1, String[] courseName, String[] studentName){
		for(int i=0; i<courseName.length; i++){ //遍历课程的集合
			Element courseList = bodyE1.addElement("CourseList");//在bodyE1节点下增加节点
			Element sheehE1 = courseList.addElement("CouseCode");
			sheehE1.setText(courseName[i]);
			addItem(studentName, courseList);
		}
	}
	private void addItem(String[] studentName, Element courseList){
		Element studentE1 = courseList.addElement("Student");
		for(int i=0; i<studentName.length; i++){
			Element studentNameE1 = studentE1.addElement("StudentName");
			studentNameE1.setText(studentName[i]);
			studentNameE1.addAttribute("score", new Random().nextInt(100)+"");//设置属性
		}
	}
}

生成的xml文档如下：

<?xml version="1.0" encoding="gbk"?>
<Record>
  <Head>
    <Code>SD1101</Code>
    <Exam>是</Exam>
  </Head>
  <Body>
    <CourseList>
      <CouseCode>射箭</CouseCode>
      <Student>
        <StudentName score="49">张三丰</StudentName>
        <StudentName score="22">花木兰</StudentName>
        <StudentName score="67">郭靖</StudentName>
        <StudentName score="5">任我行</StudentName>
        <StudentName score="1">赵敏</StudentName>
        <StudentName score="91">奥巴马</StudentName>
      </Student>
    </CourseList>
    <CourseList>
      <CouseCode>骑马</CouseCode>
      <Student>
        <StudentName score="29">张三丰</StudentName>
        <StudentName score="3">花木兰</StudentName>
        <StudentName score="35">郭靖</StudentName>
        <StudentName score="5">任我行</StudentName>
        <StudentName score="22">赵敏</StudentName>
        <StudentName score="4">奥巴马</StudentName>
      </Student>
    </CourseList>
    <CourseList>
      <CouseCode>烹饪</CouseCode>
      <Student>
        <StudentName score="27">张三丰</StudentName>
        <StudentName score="27">花木兰</StudentName>
        <StudentName score="41">郭靖</StudentName>
        <StudentName score="12">任我行</StudentName>
        <StudentName score="68">赵敏</StudentName>
        <StudentName score="42">奥巴马</StudentName>
      </Student>
    </CourseList>
    <CourseList>
      <CouseCode>种花</CouseCode>
      <Student>
        <StudentName score="71">张三丰</StudentName>
        <StudentName score="86">花木兰</StudentName>
        <StudentName score="44">郭靖</StudentName>
        <StudentName score="78">任我行</StudentName>
        <StudentName score="72">赵敏</StudentName>
        <StudentName score="97">奥巴马</StudentName>
      </Student>
    </CourseList>
    <CourseList>
      <CouseCode>打扑克</CouseCode>
      <Student>
        <StudentName score="79">张三丰</StudentName>
        <StudentName score="32">花木兰</StudentName>
        <StudentName score="78">郭靖</StudentName>
        <StudentName score="63">任我行</StudentName>
        <StudentName score="25">赵敏</StudentName>
        <StudentName score="58">奥巴马</StudentName>
      </Student>
    </CourseList>
  </Body>
</Record>

3利用XPath定位节点。

import java.util.List;

import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;

public class TestXpath {
	public static void main(String[] args){
		new TestXpath().findBooks();
		new TestXpath().findBooksByTitle();
	}
	public void findBooks(){
		SAXReader reader = new SAXReader();
		try{
			Document doc = reader.read("C:\\Users\\fen\\workspace\\XMLDemo\\src\\parser\\books.xml");
			Node root = doc.selectSingleNode("/bookstore");
			List list1 = root.selectNodes("book[author='古龙']");//查找的是直接子标记
			List list2 =	root.selectNodes("book[price<40]");
			List list3 = root.selectNodes("book[@category='WEB']");
			List list4 = root.selectNodes("book[title[@lang='zh']]");//lang是title的属性，而titles是book的子节点
			List list5 = root.selectNodes("book[author='金庸' and price>50]");
			List list6 = root.selectNodes("book[title[@lang='zh'] and price>50]");
			List list7 = root.selectNodes("book[author='金庸' or author='古龙']"); //查找作者是古龙或者金庸的书
			
			List list8 = root.selectNodes("book[title='天龙八部']");
			for(Object o:list3){
				Element e = (Element)o;//e=book
				System.out.println(e.elementText("title"));
				System.out.println(e.elementText("author"));
				System.out.println(e.elementText("year"));
				System.out.println(e.elementText("price"));
				String show = e.element("title").attributeValue("lang");
				System.out.println("lang="+show+"\n");
			}
		}catch(Exception e){
			e.printStackTrace();
		}
	}
	//book[title]和 book/title[@lang='zh']的区别
	//book[titie]这中方式查找的是book节点，book小的子节点都能够找到
	//直接找到book下的title这个节点
	public void findBooksByTitle(){ //指定节点的查找
		SAXReader reader  = new SAXReader();
		try{
			Document doc = reader.read("src/parser/books.xml");//这个是文件所放的目录
			Node root = doc.selectSingleNode("/bookstore");
			List list = root.selectNodes("book/title[@lang='zh']");
			for(Object o:list){
				Element e = (Element)o;//e = title
				System.out.println(e.getStringValue());
			}
		}catch(Exception e){
			e.printStackTrace();
		}
	}
}

主要的步骤就是：

1.用SAXReader类定义的对象，调用read方法，读取指定的文件，生成Document对象

2.Document对象调用selectSingleNode()方法，定位根节点，得到Node对象。

3.利用根Node，可以定位根下的所有的子节点。

这个程序使用的文件内容如下：

<?xml version="1.0" encoding="GBK"?>

<bookstore>

	<book category="武侠">
		<title lang="zh">天龙八部</title>
		<author>金庸</author>
		<year>1965</year>
		<price>65.00</price>
	</book>
	
	<book category="武侠">
		<title lang="zh">天涯明月刀</title>
		<author>古龙</author>
		<year>1976</year>
		<price>40.00</price>
	</book>

	<book category="科幻">
		<title lang="en">Harry Potter</title>
		<author>J K. Rowling</author>
		<year>2005</year>
		<price>29.99</price>
	</book>

	<book category="WEB">
		<title lang="en">XQuery Kick Start</title>
		<author>James McGovern</author>
		<author>Per Bothner</author>
		<author>Kurt Cagle</author>
		<author>James Linn</author>
		<author>Vaidyanathan Nagarajan</author>
		<year>2003</year>
		<price>49.99</price>
	</book>

	<book category="WEB">
		<title lang="en">Learning XML</title>
		<author>Erik T. Ray</author>
		<year>2003</year>
		<price>39.95</price>
	</book>

</bookstore>

其实还是比较简单的一下基础知识，需要多练习。