/*
* Copyright 2002-2009 Andy Clark, Marc Guillemot
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package sample;
import java.io.File;
import java.io.FileWriter;
import java.io.StringWriter;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.cyberneko.html.parsers.DOMParser;
import org.dom4j.io.DOMReader;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.XMLWriter;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
/**
* This program tests the NekoHTML parser's use of the HTML DOM implementation
* by printing the class names of all the nodes in the parsed document.
*
* @author Andy Clark
*
* @version $Id: TestHTMLDOM.java,v 1.3 2004/02/19 20:00:17 andyc Exp $
*/
public class TestHTMLDOM {
/**
*
* org.w3c.dom.Document转成 org.dom4j.Document
*
* @param doc
* Document(org.w3c.dom.Document)
*
* @throws Exception
*
* @return Document
*/
public static org.dom4j.Document parse(org.w3c.dom.Document doc)
throws Exception {
if (doc == null) {
return null;
}
DOMReader domReader = new DOMReader();
return domReader.read(doc);
}
/** Main. */
public static void main(String[] argv) throws Exception {
// 0.生成解析对象
DOMParser parser = new DOMParser();
for (int i = 0; i < argv.length; i++) {
// 1.调用neoko进行parser
parser.parse(argv[i]);
// 2.获得parsing的结果
Document doc = parser.getDocument();
// 3.将w3c的doc转成dom4j的doc,写入到与原来名称相同,后缀为xhtml的文件
File file = new File(argv[i].substring(5, argv[i].lastIndexOf("."))
+ ".xhtml");
OutputFormat format = OutputFormat.createPrettyPrint();
format.setEncoding("utf-8"); // XML中文乱码解决
XMLWriter writer = new XMLWriter(new FileWriter(file), format);
writer.write(parse(doc));
writer.close();
}
}
/** Prints a node's class name. */
public static void print(Node node, String indent) {
System.out.println(indent + node.getClass().getName());
Node child = node.getFirstChild();
while (child != null) {
print(child, indent + " ");
child = child.getNextSibling();
}
}
/**
* w3c document 转xml
*
* @param doc
* @return
* @throws TransformerFactoryConfigurationError
* @throws TransformerException
*/
public static String toString(Document doc)
throws TransformerFactoryConfigurationError, TransformerException {
DOMSource source = new DOMSource(doc);
StringWriter writer = new StringWriter();
Result result = new StreamResult(writer);
Transformer transformer = TransformerFactory.newInstance()
.newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.CDATA_SECTION_ELEMENTS, "yes");
transformer.setOutputProperty(
"{http://xml.apache.org/xslt}indent-amount", "2");
transformer.transform(source, result);
return (writer.getBuffer().toString());
}
}
html转化生成dom
最新推荐文章于 2023-06-02 13:41:32 发布