使用DOM方式去解析word内容
@Override
public String exportPath(ZjclSj sj) {
String filepath = "/temp/" + sj.getSjmc() + ".xml";
//调用 DocumentBuilderFactory.newInstance() 方法得到创建 DOM 解析器的工厂
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
try {
//调用工厂对象的 newDocumentBuilder方法得到 DOM 解析器对象
DocumentBuilder builder = factory.newDocumentBuilder();
//1、通过文件的方式获取Document对象
String path = ResourceManager.getRealPath() + "/temp/sjmb.xml";
Document document = builder.parse(new File(path));
//根据标签名称获取该名称的所有节点对象
// NodeList nodelist = document.getElementsByTagName("wx:sect");
// //遍历
// for (int i = 0; i < nodelist.getLength(); i++) {
// //得到具体的某个节点对象
// Node node = nodelist.item(i);
// System.out.println(node.getNodeName());
// listNodes(node);
// }
//Node sectNode = document.getElementById("sectId");
Node sectNode = document.getElementsByTagName("wx:sect").item(0);
System.out.println("----------"+sectNode.getNodeName());
// <w:p>
// <w:r>
// <w:t>Hello, World.</w:t>
// </w:r>
// </w:p>
//创建元素节点
Element wp = document.createElement("w:p");
Node wr = document.createElement("w:r");
Node wt = document.createElement("w:t");
wt.appendChild(document.createTextNode("Hello, World."));
wr.appendChild(wt);
wp.appendChild(wr);
sectNode.appendChild(wp);
String path2 = ResourceManager.getRealPath() + "/temp/" + sj.getSjmc() + ".doc";;
System.out.println("path2:"+path2);
saveXml(path2, document);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return filepath;
}
public static void saveXml(String fileName, Document doc) {// 将Document输出到文件
TransformerFactory transFactory = TransformerFactory.newInstance();
try {
Transformer transformer = transFactory.newTransformer();
transformer.setOutputProperty("indent", "yes");
DOMSource source = new DOMSource();
source.setNode(doc);
StreamResult result = new StreamResult();
result.setOutputStream(new FileOutputStream(fileName));
transformer.transform(source, result);
} catch (TransformerConfigurationException e) {
e.printStackTrace();
} catch (TransformerException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
/**
* 遍历根据节点对象下面的所有的节点对象
* @param node
*/
public void listNodes(Node node) {
// 节点是什么类型的节点
if (node.getNodeType() == Node.ELEMENT_NODE) {// 判断是否是元素节点
Element element = (Element) node;
//判断此元素节点是否有属性
if(element.hasAttributes()){
//获取属性节点的集合
NamedNodeMap namenm = element.getAttributes();//Node
//遍历属性节点的集合
for(int k=0;k<namenm.getLength();k++){
//获取具体的某个属性节点
Attr attr = (Attr) namenm.item(k);
System.out.println("attr:"+attr.getNodeName()+" value:"
+attr.getNodeValue()+" type:"+attr.getNodeType());
}
}
//获取元素节点的所有孩子节点
NodeList listnode = element.getChildNodes();
//遍历
for (int j = 0; j < listnode.getLength(); j++) {
//得到某个具体的节点对象
Node nd = listnode.item(j);
System.out.println("nd:" + nd.getNodeName() + " value:"
+ nd.getNodeValue() + " type:" + nd.getNodeType());
//重新调用遍历节点的操作的方法
listNodes(nd);
}
}
}
--摘自 试卷考试系统、手动组卷导出