最近有一个将RTF格式的文件转换成HTML格式的需求,网上搜索发现相关资料比较少,能找到的一些资料也年代比较久远。经过一番摸索和测试,终于成功的将RTF转成了HTML(主要以div标签为主),并且解决了烦人的中文乱码问题。但是很遗憾,目前RTF文件里面的表格和图片还无法转换(没有找到方案)。
1、首先,我们需要先借助WebCAT里面的RTF2HTML这个类,WebCAT的下载地址为:http://webcat.sourceforge.net/或者https://download.csdn.net/download/Rookie_cc/12657315。(你也可以直接参考下面的代码,不用下载WebCAT)
2、具体代码如下:
RTF2HTML工具类:
package com.fish.fileparser.utils;
import java.awt.Color;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.StringTokenizer;
import javax.swing.text.AttributeSet;
import javax.swing.text.BadLocationException;
import javax.swing.text.DefaultStyledDocument;
import javax.swing.text.Document;
import javax.swing.text.Element;
import javax.swing.text.StyleConstants;
import javax.swing.text.rtf.RTFEditorKit;
/**
* Description of the Class
*
*@author bmartins
*@created 22 de Agosto de 2002
*/
public class RTF2HTML {
/**
* rtf文件表格标识
*/
public static final String TABLE = "\\trowd";
/**
* rtf文件图片标识
*/
public static final String PICTURE = "\\*\\shppic";
/**
* Description of the Class
*
*@author bmartins
*@created 22 de Agosto de 2002
*/
private class HTMLStateMachine {
private String alignNames[] = { "left", "center", "right" };
/**
* Description of the Field
*/
public boolean acceptFonts;
private String fontName;
private Color color;
private int size;
private int alignment;
private boolean bold;
private boolean italic;
private boolean underline;
private double firstLineIndent;
private double oldLeftIndent;
private double oldRightIndent;
private double leftIndent;
private double rightIndent;
private boolean firstLine;
/**
* Constructor for the HTMLStateMachine object
*/
HTMLStateMachine() {
acceptFonts = true;
fontName = "";
alignment = -1;
bold = false;
italic = false;
underline = false;
color = null;
size = -1;
firstLineIndent = 0.0D;
oldLeftIndent = 0.0D;
oldRightIndent = 0.0D;
leftIndent = 0.0D;
rightIndent = 0.0D;
firstLine = false;
}
/**
* Description of the Method
*
*@param attributeset Description of the Parameter
*@param stringbuffer Description of the Parameter
*@param element Description of the Parameter
*/
public void updateState(
AttributeSet attributeset,
StringBuffer stringbuffer,
Element element) {
String s = element.getName();
if (s.equalsIgnoreCase("paragraph")) {
firstLine = true;
}
leftIndent =
updateDouble(
attributeset,
leftIndent,
StyleConstants.LeftIndent);
rightIndent =
updateDouble(
attributeset,
rightIndent,
StyleConstants.RightIndent);
if (leftIndent != oldLeftIndent || rightIndent != oldRightIndent) {
closeIndentTable(stringbuffer, oldLeftIndent, oldRightIndent);
}
bold =
updateBoolean(
attributeset,
StyleConstants.Bold,
"b",
bold,
stringbuffer);
italic =
updateBoolean(
attributeset,
StyleConstants.Italic,
"i",
italic,
stringbuffer);
underline =
updateBoolean(
attributeset,
StyleConstants.Underline,
"u",
underline,
stringbuffer);
size = updateFontSize(attributeset, size, stringbuffer);
color = updateFontColor(attributeset, color, stringbuffer);
if (acceptFonts) {
fontName = updateFontName(attributeset, fontName, stringbuffer);
}
alignment = updateAlignment(attributeset, alignment, stringbuffer);
firstLineIndent =
updateDouble(
attributeset,
firstLineIndent,
StyleConstants.FirstLineIndent);
if (leftIndent != oldLeftIndent || rightIndent != oldRightIndent) {
openIndentTable(stringbuffer, leftIndent, rightIndent);
oldLeftIndent = leftIndent;
oldRightIndent = rightIndent;
}
}
/**
* Description of the Method
*
*@param stringbuffer Description of the Parameter
*@param d Description of the Parameter
*@param d1 Description of the Parameter
*/
private void openIndentTable(
StringBuffer stringbuffer,
double d,
double d1) {
if (d != 0.0D || d1 != 0.0D) {
closeSubsetTags(stringbuffer);
stringbuffer.append("<table><tr>");
String s = getSpaceTab((int) (d / 4D));
if (s.length() > 0) {
stringbuffer.append("<td>" + s + "</td>");