java rtf 转 html_RTF转HTML,HTML转TXT(Java版)之威力加强版 | 学步园

这个Java程序可以将RTF文件转换为HTML或TXT格式,支持指定输入输出文件、编码方式,并提高了HTML字符反转义效率。转换过程在4秒内完成5MB的RTF文件。
摘要由CSDN通过智能技术生成

增强的功能有:

自由指定要转换的RTF文件和输出文件,输出文件可以不指定

支持HTML与TXT两种格式,默认为TXT

可以指定转换编码,默认为UTF-8

提高HTML字符反转义效率(循环代替递归)

显示执行时间,5M的RTF转换在4s内完成

代码如下,展开查看!

import java.io.File;

import java.io.FileWriter;

import java.util.Date;

import java.util.HashMap;

import pt.tumba.parser.rtf.RTF2HTML;

/**

* Convert RTF to HTML, RTF to TXT

*

* @author KNIGHTRCOM(rcom10002@163.com)

* {@link http://blog.csdn.net/rcom10002}

*/

public class Main {

private static String sourceFilename;

private static String outputFilename;

private static boolean isForced = false; // indicate to overwrite the existing file

private static boolean isSystemListed = false;

private static String type = "txt";

private static String encoding = "UTF-8";

/**

* -i System information (optional)

* -s Source RTF file (mandatory)

* -o Output file name (optional)

* --force Overwrite output file if it exists

*

* @param args

* @throws Exception

*/

public static void main(String[] args) {

try {

if (args != null && args.length > 0) {

for (String arg : args) {

if (arg.equals("-i")) {

isSystemListed = true;

} else if (arg.startsWith("-s")) {

sourceFilename = arg.substring(2);

} else if (arg.startsWith("-o")) {

outputFilename = arg.substring(2);

} else if (arg.equals("--force")) {

isForced = true;

} else if (arg.startsWith("-t")) {

type = arg.substring(2);

if (!"txt".equals(type) && !"html".equals(type)) {

isSystemListed = false;

sourceFilename = null;

break;

}

} else if (arg.startsWith("-e")) {

encoding = arg.substring(2);

} else {

isSystemListed = false;

sourceFilename = null;

break;

}

}

}

if (((sourceFilename == null || !new File(sourceFilename).exists()) && !isSystemListed) ||

args != null && args.length == 1 && args[0].equals("--help")){

System.out.println("usage: java Main [-t] [-eEncodingName] [--force] -sSourceFileName [-oOutputFileName]");

System.out.println(" java Main [-i,--help]");

return;

}

if (isSystemListed) {

listSystemInfo();

}

executeConvertion();

} catch (Exception e) {

System.err.print("Errmsg: " + e.getMessage());

System.exit(1);

}

}

/**

* List the system info you may concern

*/

public static void listSystemInfo() {

System.getProperties().list(System.out);

}

/**

* Convert RTF to required format

*

* @throws Exception

*/

private static void executeConvertion() throws Exception {

long duration = new Date().getTime();

// Convert rtf to HTML

String result = new RTF2HTML().convertRTFToHTML(new File(sourceFilename));

// This step is important for rendering the text with proper encoding

result = new String(result.getBytes(System.getProperty("sun.jnu.encoding")), encoding);

if ("txt".equals(type)) {

// Extract plain text from HTML

result = result.replaceAll("(?i)", "
/n").replaceAll("<.>", "");

result = StringUtils.unescapeHTML(result, 0);

}

// Write the result to the file

if (outputFilename == null) {

outputFilename = sourceFilename.concat(".txt");

}

File outputFile = new File(outputFilename);

if (outputFile.exists() && !isForced) {

System.out.print("Warning: Output file already exists! Try execute with --force.");

System.exit(-1);

}

FileWriter w = new FileWriter(outputFile);

w.write(StringUtils.trimThroughLines(result));

w.close();

duration -= new Date().getTime();

System.out.print("Complete!(" + (duration / 1000 * -1) + "s)");

}

}

/**

* http://www.rgagnon.com/javadetails/java-0307.html

*

*/

class StringUtils {

private StringUtils() {

}

private static HashMap htmlEntities;

static {

htmlEntities = new HashMap();

htmlEntities.put("

htmlEntities.put(">", ">");

htmlEntities.put("&", "&");

htmlEntities.put(""", "/"");

htmlEntities.put("à", "à");

htmlEntities.put("À", "À");

htmlEntities.put("â", "â");

htmlEntities.put("ä", "ä");

htmlEntities.put("Ä", "Ä");

htmlEntities.put("Â", "Â");

htmlEntities.put("å", "å");

htmlEntities.put("Å", "Å");

htmlEntities.put("æ", "æ");

htmlEntities.put("Æ", "Æ");

htmlEntities.put("ç", "ç");

htmlEntities.put("Ç", "Ç");

htmlEntities.put("é", "é");

htmlEntities.put("É", "É");

htmlEntities.put("è", "è");

htmlEntities.put("È", "È");

htmlEntities.put("ê", "ê");

htmlEntities.put("Ê", "Ê");

htmlEntities.put("ë", "ë");

htmlEntities.put("Ë", "Ë");

htmlEntities.put("ï", "ï");

htmlEntities.put("Ï", "Ï");

htmlEntities.put("ô", "ô");

htmlEntities.put("Ô", "Ô");

htmlEntities.put("ö", "ö");

htmlEntities.put("Ö", "Ö");

htmlEntities.put("ø", "ø");

htmlEntities.put("Ø", "Ø");

htmlEntities.put("ß", "ß");

htmlEntities.put("ù", "ù");

htmlEntities.put("Ù", "Ù");

htmlEntities.put("û", "û");

htmlEntities.put("Û", "Û");

htmlEntities.put("ü", "ü");

htmlEntities.put("Ü", "Ü");

htmlEntities.put(" ", " ");

htmlEntities.put("©", "/u00a9");

htmlEntities.put("®", "/u00ae");

htmlEntities.put("€", "/u20a0");

}

public static final String unescapeHTML(String source, int start) {

int i, j;

// 将递归

java -Dfile.encoding=UTF-8 -cp "C:/Documents and Settings/Administrator/My Documents/Workspace/eclipse/RTF/Document Parser;" Main -s"C:/Documents and Settings/Administrator/My Documents/Workspace/php eclipse/QAR Tool/questions/sample.rtf" -o"C:/my.txt" --force -eGB2312 -thtml

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值