最近在调试科大讯飞语音的时候,里面用到了xml解析,简单探究,记录一下成长历程。
Java中操作DOM中常用的类
- Node
数据类型基类 - Element
最常用的类 - Attr
Element的属性 - Text
Element or Attr的内容 - Document
代表整个XML文档,代表DOM tree
使用DOM的步骤
1.导入相关类
import org.w3c.dom.*;
import javax.xml.parsers.*;
import java.io.*;
Create a DocumentBuilder
2.创建DocumentBuilder对象
DocumentBuilderFactory factory =
DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
3.从文件或流中创建Document对象
StringBuilder xmlStringBuilder = new StringBuilder();
xmlStringBuilder.append("<?xml version=\"1.0\"?> <class> </class>");
ByteArrayInputStream input = new ByteArrayInputStream(
xmlStringBuilder.toString().getBytes("UTF-8"));
Document doc = builder.parse(input);
4 提取根元素
Element root = document.getDocumentElement();
5 处理属性&&子节点
//returns specific attribute
element.getAttribute("attributeName");
//returns a Map (table) of names/values
element.getAttributes();
//Examine sub-elements
//returns a list of subelements of specified name
element.getElementsByTagName("subelementName");
//returns a list of all child nodes
element.getChildNodes();
XML文件解析实例一
放到项目根目录下的,需要解析的input.txt文件,其内容如下:
<?XML version="1.0"?>
<class>
<student rollno="393">
<firstname>dinkar</firstname>
<marks>85</marks>
</student>
<student rollno="493">
<firstname>Vaneet</firstname>
<marks>95</marks>
</student>
<student rollno="593">
<firstname>jasvir</firstname>
<marks>90</marks>
</student>
</class>
解析代码如下:
package com.example.xmlparse;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.File;
public class FirstDOMParseDemo {
public static void main(String[] args) {
try {
File inputFile = new File("input.txt");
//1.create DocumentBuilder object
DocumentBuilderFactory dbFactory
= DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
//2.create Document object
Document doc = dBuilder.parse(inputFile);
doc.getDocumentElement().normalize();
//get root node
System.out.println("Root element :"
+ doc.getDocumentElement().getNodeName());
//3.get student list;
NodeList nList = doc.getElementsByTagName("student");
System.out.println("----------------------------");
for (int temp = 0; temp < nList.getLength(); temp++) {
Node nNode = nList.item(temp);
System.out.println("\nCurrent Element :"
+ nNode.getNodeName());
if (nNode.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) nNode;
//get attribute
System.out.println("Student roll no : "
+ eElement.getAttribute("rollno"));
//get element content
System.out.println("First Name : "
+ eElement
.getElementsByTagName("firstname")
.item(0)
.getTextContent());
System.out.println("Marks : "
+ eElement
.getElementsByTagName("marks")
.item(0)
.getTextContent());
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
结果如下:
Root element :class
----------------------------
Current Element :student
Student roll no : 393
First Name : dinkar
Marks : 85
Current Element :student
Student roll no : 493
First Name : Vaneet
Marks : 95
Current Element :student
Student roll no : 593
First Name : jasvir
Marks : 90
XML解析实例二
这里将要解析的input2.txt文件放在项目的根目录下,其内容如下:
<?xml version="1.0"?>
<cars>
<supercars company="Ferrari">
<carname type="formula one">Ferarri 101</carname>
<carname type="sports car">Ferarri 201</carname>
</supercars>
<supercars company="Lamborgini">
<carname>Lamborgini 001</carname>
</supercars>
</cars>
package com.example.xmlparse;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.File;
public class SecondDOMParserDemo {
public static void main(String argv[]) {
try {
File inputFile = new File("input2.txt");
DocumentBuilderFactory dbFactory =
DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(inputFile);
doc.getDocumentElement().normalize();
System.out.println("Root element: " + doc.getDocumentElement().getNodeName());
NodeList nList = doc.getElementsByTagName("supercars");
System.out.println("----------------------------");
for (int temp = 0; temp < nList.getLength(); temp++) {
Node nNode = nList.item(temp);
System.out.print("\nCurrent Element :");
System.out.println(nNode.getNodeName());
if (nNode.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) nNode;
System.out.println("company : " + eElement.getAttribute("company"));
NodeList carNameList =
eElement.getElementsByTagName("carname");
for (int count = 0;
count < carNameList.getLength(); count++) {
Node node1 = carNameList.item(count);
if (node1.getNodeType() ==
node1.ELEMENT_NODE) {
Element car = (Element) node1;
System.out.println("car name : " + car.getTextContent());
System.out.println("car type : " + car.getAttribute("type"));
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
解析结果如下:
Root element: cars
----------------------------
Current Element :supercars
company : Ferrari
car name : Ferarri 101
car type : formula one
car name : Ferarri 201
car type : sports car
Current Element :supercars
company : Lamborgini
car name : Lamborgini 001
car type :
创建XML并写入文件实例
源代码如下:
package com.example.xmlparse;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
public class FirstCreateXMLDemo {
public static void main(String argv[]) {
try {
DocumentBuilderFactory dbFactory =
DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder =
dbFactory.newDocumentBuilder();
Document doc = dBuilder.newDocument();
// root element
Element rootElement = doc.createElement("cars");
doc.appendChild(rootElement);
// supercars element
Element supercar = doc.createElement("supercars");
rootElement.appendChild(supercar);
// setting attribute to element
Attr attr = doc.createAttribute("company");
attr.setValue("Ferrari");
supercar.setAttributeNode(attr);
// carname element
Element carname = doc.createElement("carname");
Attr attrType = doc.createAttribute("type");
attrType.setValue("formula one");
carname.setAttributeNode(attrType);
carname.appendChild(
doc.createTextNode("Ferrari 101"));
supercar.appendChild(carname);
final Element carname2 = doc.createElement("carname");
carname2.appendChild(
doc.createTextNode("Ferrari 202"));
supercar.appendChild(carname2);
// write the content into xml file
TransformerFactory transformerFactory =
TransformerFactory.newInstance();
Transformer transformer =
transformerFactory.newTransformer();
DOMSource source = new DOMSource(doc);
File file = new File("output.txt");
if (file.exists()) {
file.delete();
}
StreamResult result =
new StreamResult(new File("output.txt"));
transformer.transform(source, result);
// Output to console for testing
StreamResult consoleResult =
new StreamResult(System.out);
transformer.transform(source, consoleResult);
} catch (Exception e) {
e.printStackTrace();
}
}
}
创建的位于项目根目录下的output.txt的内容如下(没有进行格式化):
<?xml version="1.0" encoding="UTF-8" standalone="no"?><cars>
<supercars company="Ferrari">
<carname type="formula one">Ferrari 101</carname>
<carname>Ferrari 202</carname>
</supercars>
</cars>
修改XML文件实例
这里使用的位于项目根目录下的output2.txt的文件内容如下:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<cars>
<supercars company="Ferrari">
<carname type="formula one">Ferrari 101</carname>
<carname type="sports">Ferrari 202</carname>
</supercars>
<luxurycars company="Benteley">
<carname>Benteley 1</carname>
<carname>Benteley 2</carname>
<carname>Benteley 3</carname>
</luxurycars>
</cars>
修改代码如下:
package com.example.xmlparse;
import org.w3c.dom.*;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
public class ModifyXmlFileDemo {
public static void main(String argv[]) {
try {
File inputFile = new File("output2.txt");
DocumentBuilderFactory docFactory =
DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder =
docFactory.newDocumentBuilder();
Document doc = docBuilder.parse(inputFile);
Node cars = doc.getFirstChild();
cars.getNextSibling();
cars.getPreviousSibling();
cars.getFirstChild();
cars.getLastChild();
Node supercar = doc.getElementsByTagName("supercars").item(0);
// update supercar attribute
NamedNodeMap attr = supercar.getAttributes();
Node nodeAttr = attr.getNamedItem("company");
nodeAttr.setTextContent("Lamborigini");
// loop the supercar child node
NodeList list = supercar.getChildNodes();
for (int temp = 0; temp < list.getLength(); temp++) {
Node node = list.item(temp);
if (node.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) node;
if ("carname".equals(eElement.getNodeName())) {
if ("Ferrari 101".equals(eElement.getTextContent())) {
eElement.setTextContent("Lamborigini 001");
}
if ("Ferrari 202".equals(eElement.getTextContent()))
eElement.setTextContent("Lamborigini 002");
}
}
}
NodeList childNodes = cars.getChildNodes();
for (int count = 0; count < childNodes.getLength(); count++) {
Node node = childNodes.item(count);
if ("luxurycars".equals(node.getNodeName()))
cars.removeChild(node);
}
// write the content on console
TransformerFactory transformerFactory =
TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
DOMSource source = new DOMSource(doc);
System.out.println("-----------Modified File-----------");
StreamResult consoleResult = new StreamResult(System.out);
transformer.transform(source, consoleResult);
} catch (Exception e) {
e.printStackTrace();
}
}
}
结果如下:
-----------Modified File-----------
<?xml version="1.0" encoding="UTF-8" standalone="no"?><cars>
<supercars company="Lamborigini">
<carname type="formula one">Lamborigini 001</carname>
<carname type="sports">Lamborigini 002</carname>
</supercars>
</cars>