转载请注明出处。原文地址:https://blog.csdn.net/qq_39098505/article/details/81541191
doc转html获取带样式内容,并在ueditor中显示
工具类:
获取返回的内容,存到数据库。
package com.wb.core.utils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
public class DocToHtml {
public static void main(String argv[]) {
try {
String content=wordToHtml("D:\\a.doc");
System.out.println(content);
String content1=wordToHtml("D:\\b.docx");
System.out.println(content1);
} catch (Exception e) {
e.printStackTrace();
}
}
public static String wordToHtml(String filePath) throws Exception{
if(filePath.endsWith(".doc")){
String content=convert2Html(filePath);
return content;
}
if(filePath.endsWith(".docx")){
String content=docxToHtml(filePath);
return content;
}
return null;
}
//docx转html
//生成html文件
//输出html标签和内容
public static String docxToHtml(String sourceFileName) throws Exception {
String htmlPath=sourceFileName.substring(0,sourceFileName.indexOf("."))+".html";
XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileName));
XHTMLOptions options = XHTMLOptions.create().indent(4);
File outFile = new File(htmlPath);
outFile.getParentFile().mkdirs();
OutputStream out = new FileOutputStream(outFile);
XHTMLConverter.getInstance().convert(document,out, options);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
XHTMLConverter.getInstance().convert(document, baos, options);
baos.close();
String content =new String(baos.toByteArray());
//替换UEditor无法识别的转义字符
String htmlContent1=content.replaceAll("“","\"").replaceAll("”","\"").replaceAll("—","-");
return htmlContent1;
}
//doc 转 html
public static String convert2Html(String fileName)
throws TransformerException, IOException,
ParserConfigurationException {
HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));//WordToHtmlUtils.loadDoc(new FileInputStream(inputFile));
//兼容2007 以上版本
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument());
wordToHtmlConverter.processDocument(wordDocument);
//解析html
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream out = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "GB2312");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "HTML");
serializer.transform(domSource, streamResult);
out.close();
String htmlContent=new String(out.toByteArray());
//替换UEditor无法识别的转义字符
String htmlContent1=htmlContent.replaceAll("“","\"").replaceAll("”","\"").replaceAll("—","-");
return htmlContent1;
}
}
1.在ueditor.all.js文件内搜索allowDivTransToP,找到如下的代码,将true设置为false,即不使用默认的过滤处理,默认是过滤掉html,style的。
2.在下边的addInputRule方法中将switch代码段中的case style,script都给注释或者删掉,防止UEditor将html,<style>转化成其他标签。
3.在ueditor.config.js中添加xss的白名单html,head,body,style,不过滤这些标签,就可以在UEditor上显示样式。
注意:如果还是不显示样式,看下引入的是不是ueditor.all.js。