javax.swing css,使用javax.swing.text.html包解析HTML文档

最新推荐文章于 2023-10-25 15:33:40 发布

张涵涵

最新推荐文章于 2023-10-25 15:33:40 发布

阅读量355

点赞数

文章标签： javax.swing css

下面是一个例子：

import javax.swing.text.html.HTMLEditorKit$ParserCallback;

import javax.swing.text.html.*;

import javax.swing.text.*;

import com.pdcss.debug.*;

import java.util.*;

import javax.swing.text.html.parser.*;

import java.io.*;

import java.net.*;

import com.pdcss.xml.*;

import com.pdcss.debug.DebugOut;

public class HTMLToXML {

/**

* document对象

org.w3c.dom.Document newDoc;

/**

* 根结点对象

private org.w3c.dom.Element root;

/**

* 是否忽略标签

private boolean ignoreImg=true;

/**

* 设置是否忽略按钮标签

private boolean ignoreButton=true;

/**

* 文件路径

private String filePath;

/**

* 是否调用了函数parse()

private boolean isParse=false;

/**

* 处理结果可接受的html tagname,如果不在这个表里面则忽略掉这个元素

private ArrayList acceptedTags;

public HTMLToXML(String filePath) {

this.filePath=filePath;

newDoc = XmlParser.buildNewDocument();

root= newDoc.createElement("HTMLToXML");

newDoc.appendChild(root);

//增加可接受的元素的tag

acceptedTags=new ArrayList(5);

acceptedTags.add(HTML.Tag.INPUT);

acceptedTags.add(HTML.Tag.TEXTAREA);

acceptedTags.add(HTML.Tag.SELECT);

}

/**

* 开始解析

private void parse(){

HTMLEditorKit.Parser parser = new ParserDelegator();

HTMLEditorKit.ParserCallback callback = new MyParserCallback();

try {

URL u = new File(filePath).toURL();

InputStream in = u.openStream();

InputStreamReader reader = new InputStreamReader(in);

parser.parse(reader, callback, false);

reader.close();

}

catch (IOException e) {

}

isParse=true;

}

/**

* 返回创建好的Document对象

* @return

public org.w3c.dom.Document getXmlDocument(){

if(isParse==false){

parse();

isParse=true;

}

return this.newDoc;

}

/**

* 返回标准的xml字符串

* @return

public String getXmlStr(){

if(isParse==false){

parse();

isParse=true;

}

return XmlParser.elementToString(root);

}

/**

* 设置是否忽略img标签

* @param isIgnore

public void setIgnoreImg(boolean isIgnore){

ignoreImg=isIgnore;

if(ignoreImg==true){

acceptedTags.remove(HTML.Tag.IMG);

}else{

if(!acceptedTags.contains(HTML.Tag.IMG)){

acceptedTags.add(HTML.Tag.IMG);

}

public boolean getIgnoreImg(){

return ignoreImg;

}

/**

* 设置是否忽略button标签

* @param isIgnore

public void setIgnoreButton(boolean isIgnore){

ignoreButton=isIgnore;

}

private class MyParserCallback extends HTMLEditorKit.ParserCallback {

public void handleStartTag(HTML.Tag tag, MutableAttributeSet attributes, int position) {

handleTag(tag, attributes, position);

}

public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attributes, int position) {

handleTag(tag, attributes, position);

}

private void handleTag(HTML.Tag tag, MutableAttributeSet attributes, int position){

if(!acceptedTags.contains(tag)){

return;

}

if(ignoreButton==true){

if (tag == HTML.Tag.INPUT) {

String type = attributes.getAttribute(HTML.Attribute.TYPE).toString();

//不是按钮

if (type != null){

if (type.equalsIgnoreCase("button") || type.equalsIgnoreCase("submit") || type.equalsIgnoreCase("reset")) {

return;

}

org.w3c.dom.Element newElement = newDoc.createElement(tag.toString());

Enumeration enum = attributes.getAttributeNames();

Object attributeName;

while (enum.hasMoreElements()) {

attributeName = enum.nextElement();

newElement.setAttribute(attributeName.toString(), attributes.getAttribute(attributeName).toString());

}

root.appendChild(newElement);

}

张涵涵

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫