转载文章来源:https://www.cnblogs.com/always-online/p/4800131.html#commentform
maven依赖 这里要注意的是依赖之间应保持版本一致,否则会导致包冲突。小白的我因此困扰挺久。
<dependencies>
<!--doc2html start-->
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>fr.opensagres.xdocreport.document</artifactId>
<version>1.0.5</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
<version>1.0.5</version>
</dependency>
<!--doc2html end-->
<!--docx2html start-->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.13</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.13</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.13</version>
</dependency>
<!--docx2html end-->
</dependencies>
转换代码
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.FileURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.List;
import static java.lang.Boolean.FALSE;
import static java.lang.Boolean.TRUE;
public class DocTypeConvertImpl {
public DocTypeConvertImpl() {
}
/*
* @param filePath word文档的路径
*
*/
public void docToHtml(String filePath) throws IOException, TransformerException, ParserConfigurationException {
//word文档类型,docx类型为TURE,doc类型为FALSE
boolean docType = FALSE;
//生成html文件存放路径
final String desPath = "D:\\";
//html图片存放路径
final String imagesPath = "D:\\";
//获取文件名称
String fileName = filePath.substring(filePath.lastIndexOf("\\") + 1);
//获取文件前缀名
String name = fileName.substring(0, fileName.lastIndexOf("."));
//判断文件类型
if (fileName.endsWith(".docx") || fileName.endsWith(".DOCX")) docType = TRUE;
//开始转换
if (docType) {//当word文档为docx类型时
File f = new File(filePath);
if (!f.exists()) {
System.out.println("Sorry File does not Exists!");
} else {
if (f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")) {
// 1) 加载word文档生成 XWPFDocument对象
InputStream in = new FileInputStream(f);
XWPFDocument document = new XWPFDocument(in);
// 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)
File imageFolderFile = new File(imagesPath);
XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(imageFolderFile));
options.setExtractor(new FileImageExtractor(imageFolderFile));
options.setIgnoreStylesIfUnused(false);
options.setFragment(true);
// 3) 将 XWPFDocument转换成XHTML
OutputStream out = new FileOutputStream(new File(desPath + name + ".html"));
XHTMLConverter.getInstance().convert(document, out, options);
out.close();
}
}
} else if (!docType) { //当word文档为doc类型时
InputStream input = new FileInputStream(new File(filePath));
HWPFDocument wordDocument = new HWPFDocument(input);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
//设置图片存放的位置
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {
File imgPath = new File(imagesPath);
if (!imgPath.exists()) {//图片目录不存在则创建
imgPath.mkdirs();
}
File file = new File(imagesPath + suggestedName);
try {
OutputStream os = new FileOutputStream(file);
os.write(content);
os.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return imagesPath + suggestedName;
}
});
//解析word文档
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = wordToHtmlConverter.getDocument();
File htmlFile = new File(desPath + name+".html");
OutputStream outStream = new FileOutputStream(htmlFile);
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory factory = TransformerFactory.newInstance();
Transformer serializer = factory.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
}
}
}