先看看xml文件
<?xml version="1.0" encoding="UTF-8"?>
<configuration config="user.xml">
<property weburl="www.xzy.com">
<name>mapreduce.jobhistory.jhist.format</name>
<value>json</value>
<source>mapred-default.xml</source>
<source>job.xml</source>
</property>
<property>
<name>hadoop.proxyuser.hive.groups</name>
<value>users</value>
<source>core-site.xml</source>
</property>
</configuration>
对上诉的XMl分析结构,如果打印根节点root node的子节点,有5个,分别是<root text node>,<first element node>, <first element text node>,<second element node>,<second element text node>;这个结构很重要,很大程度影响编程。
这个步骤大体是,创建工厂,生成解析器,得到xml文件,调用parse。coding 如下:
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.File;
public class docParse {
public static void main(String args[]) throws Exception {
File hostlist = new File("src/main/resources/5.xml");//得到文件
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();//创建工厂
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(hostlist);//调用parse
doc.getDocumentElement().normalize();
Element root = doc.getDocumentElement();
System.out.println("root of xml file:" + root.getTagName());
NodeList childNodes = root.getChildNodes();
for (int i = 0;i < childNodes.getLength(); i++){
if (i == 1)//0是根节点的text node
System.out.println(childNodes.item(i).getAttributes().getNamedItem("weburl").getNodeValue());// 打印属性
System.out.print(childNodes.item(i).getNodeName()); // <root text> <first child> <first child text> <second child> <second child text>
Node node = childNodes.item(i);
System.out.print(" type:" + node.getNodeType());
if (node.getNodeType() == Node.ELEMENT_NODE){
NodeList firstNods = node.getChildNodes();
for(int j = 0; j<firstNods.getLength();j++){
System.out.print(">>child nods:" + firstNods.item(j).getNodeName());
if (firstNods.item(j).getNodeName().equals("name")){
System.out.print("--" + firstNods.item(j).getFirstChild().getNodeValue());
}
}
}
System.out.println();
}
}
}
上诉代码只是简单的测试,只做参考。
XML 文件的格式可参看 https://www.w3schools.com/xml/xml_tree.asp