package com.cloudsky.utils;
import cn.hutool.core.io.FileTypeUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.util.IdUtil;
import com.cloudsky.config.Config;
import fr.opensagres.poi.xwpf.converter.core.BasicURIResolver;
import fr.opensagres.poi.xwpf.converter.core.FileImageExtractor;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.springframework.stereotype.Component;
import org.springframework.util.ResourceUtils;
import javax.annotation.Resource;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.nio.charset.StandardCharsets;
/**
* <p>
* Word文件.doc .docx转.html工具类
* </p>
*
* @author
* @since 2021-06-18
*/
@Component
public class WordToHtml {
@Resource
private Config configUtil;
/**
* 传入文件路径,上传路径,将doc或docx转html
* @param sourceFilePath
* @param uploadPath
* @return
* @throws Exception
*/
public static String getHtmlStr(String sourceFilePath,String uploadPath){
File file = FileUtil.file(sourceFilePath);
String type = FileTypeUtil.getType(file);
String targetFileStr="";
try {
if("doc".equals(type)){
targetFileStr=docToHtml(sourceFilePath,uploadPath);
}else{
targetFileStr=docxToHtml(sourceFilePath,uploadPath);
}
} catch (Exception e) {
e.printStackTrace();
return "";
}
return readfile(targetFileStr);
}
public static String getHtmlStr2(String sourceFilePath){
File file = FileUtil.file(sourceFilePath);
String type = FileTypeUtil.getType(file);
String targetFileStr="";
try {
if("doc".equals(type)){
targetFileStr=docToHtml(sourceFilePath);
}else{
targetFileStr=docxToHtml(sourceFilePath);
}
} catch (Exception e) {
e.printStackTrace();
return "";
}
return readfile(targetFileStr);
}
/**
* doc转html
* @param sourceFileUrl
* @param uploadPath
* @return
* @throws Exception
*/
private static String docToHtml(String sourceFileUrl,String uploadPath){
try {
File path = new File(ResourceUtils.getURL("classpath:").getPath());
} catch (FileNotFoundException e) {
e.printStackTrace();
}
String imagePathStr = uploadPath + "tmp"+File.separator+"image"+File.separator;
String sourceFileName = sourceFileUrl;
String targetFileName = uploadPath + "tmp"+File.separator+"html"+File.separator+ IdUtil.simpleUUID()+".html";
File file = new File(imagePathStr);
if(!file.exists()) {
file.mkdirs();
}
HWPFDocument wordDocument = null;
try {
wordDocument = new HWPFDocument(new FileInputStream(sourceFileName));
} catch (IOException e) {
e.printStackTrace();
}
org.w3c.dom.Document document = null;
try {
document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
} catch (ParserConfigurationException e) {
e.printStackTrace();
}
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
//保存图片,并返回图片的相对路径
wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {
try (FileOutputStream out = new FileOutputStream(imagePathStr + name)) {
out.write(content);
} catch (Exception e) {
e.printStackTrace();
}
return "tmp"+File.separator + "image" +File.separator + name;
});
wordToHtmlConverter.processDocument(wordDocument);
org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(new File(targetFileName));
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = null;
try {
serializer = tf.newTransformer();
} catch (TransformerConfigurationException e) {
e.printStackTrace();
}
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
try {
serializer.transform(domSource, streamResult);
} catch (TransformerException e) {
e.printStackTrace();
}
return targetFileName;
}
/**
* 文件上传后,docx转html
* @param sourceFileUrl
* @param uploadPath
* @return
* @throws Exception
*/
private static String docxToHtml(String sourceFileUrl,String uploadPath) throws Exception {
File path = new File(ResourceUtils.getURL("classpath:").getPath());
String imagePathStr = uploadPath + "tmp"+File.separator+"image"+File.separator;
String sourceFileName = sourceFileUrl;
String targetFileName = uploadPath + "tmp"+File.separator+"html"+File.separator+IdUtil.simpleUUID()+".html";
OutputStreamWriter outputStreamWriter = null;
try {
XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileName));
XHTMLOptions options = XHTMLOptions.create();
// 存放图片的文件夹
options.setExtractor(new FileImageExtractor(new File(imagePathStr)));
// html中图片的路径
options.URIResolver(new BasicURIResolver("tmp"+File.separator+"image"));
outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFileName), "utf-8");
XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
xhtmlConverter.convert(document, outputStreamWriter, options);
} finally {
if (outputStreamWriter != null) {
outputStreamWriter.close();
}
}
return targetFileName;
}
/**
* 文件直接预览
* @param sourceFileUrl
* @return
* @throws Exception
*/
public static String docToHtml(String sourceFileUrl) throws Exception {
File path = new File(ResourceUtils.getURL("classpath:").getPath());
File tempFile = new File(sourceFileUrl.trim());
String filename = tempFile.getName().split("\\.")[0];
String imagePathStr = System.getProperty("user.dir")+File.separator+"cloudshop-product/"+File.separator+"doc"+File.separator+"image"+File.separator;
String targetFileName = System.getProperty("user.dir")+File.separator+"cloudshop-product/"+File.separator+"doc"+File.separator+"html"+File.separator+filename+".html";
File file = new File(imagePathStr);
if(!file.exists()) {
file.mkdirs();
}
HWPFDocument wordDocument = null;
try {
wordDocument = new HWPFDocument(new FileInputStream(sourceFileUrl));
} catch (IOException e) {
e.printStackTrace();
}
org.w3c.dom.Document document = null;
try {
document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
} catch (ParserConfigurationException e) {
e.printStackTrace();
}
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
//保存图片,并返回图片的相对路径
wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {
try (FileOutputStream out = new FileOutputStream(imagePathStr + name)) {
out.write(content);
} catch (Exception e) {
e.printStackTrace();
}
return "doc"+File.separator + "image" +File.separator + name;
});
wordToHtmlConverter.processDocument(wordDocument);
org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(new File(targetFileName));
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = null;
try {
serializer = tf.newTransformer();
} catch (TransformerConfigurationException e) {
e.printStackTrace();
}
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
try {
serializer.transform(domSource, streamResult);
} catch (TransformerException e) {
e.printStackTrace();
}
return targetFileName;
}
public static String docxToHtml(String sourceFileUrl) throws Exception {
File path = new File(ResourceUtils.getURL("classpath:").getPath());
File tempFile = new File(sourceFileUrl.trim());
String filename = tempFile.getName().split("\\.")[0];
String imagePathStr = System.getProperty("user.dir")+File.separator+"cloudshop-product/"+File.separator+"doc"+File.separator+"image"+File.separator;
String targetFileName = System.getProperty("user.dir")+File.separator+"cloudshop-product/"+File.separator+"doc"+File.separator+"html"+File.separator+filename+".html";
OutputStreamWriter outputStreamWriter = null;
try {
XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileUrl));
XHTMLOptions options = XHTMLOptions.create();
// 存放图片的文件夹
options.setExtractor(new FileImageExtractor(new File(imagePathStr)));
// html中图片的路径
options.URIResolver(new BasicURIResolver("doc"+File.separator+"image"));
outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFileName), "utf-8");
XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
xhtmlConverter.convert(document, outputStreamWriter, options);
} finally {
if (outputStreamWriter != null) {
outputStreamWriter.close();
}
}
return targetFileName;
}
private static String readfile(String filePath) {
StringBuffer buffer = new StringBuffer();
try {
FileInputStream fis = new FileInputStream(new File(filePath));
InputStreamReader reader = new InputStreamReader(fis,StandardCharsets.UTF_8); //最后的"GBK"根据文件属性而定,如果不行,改成"UTF-8"试试
BufferedReader br = new BufferedReader(reader);
String line;
while ((line = br.readLine()) != null) {
buffer.append(line);
}
br.close();
reader.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return buffer.toString();
}
}
Word文件.doc .docx转.html工具类实现在线预览
最新推荐文章于 2023-07-20 20:01:44 发布