POM.XML配置文件
<!--wordToHtml-->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>xdocreport</artifactId>
<version>1.0.6</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>1.3</version>
</dependency>
<dependency>
<groupId>net.sf.jacob-project</groupId>
<artifactId>jacob</artifactId>
<version>1.14.3</version>
</dependency>
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.mchange</groupId>
<artifactId>mchange-commons-java</artifactId>
<version>0.2.11</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>fr.opensagres.xdocreport.document</artifactId>
<version>2.0.1</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
<version>1.0.6</version>
</dependency>
生成代码如下:
import org.apache.commons.io.FileUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.converter.core.BasicURIResolver;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.List;
public class DocToHtml {
public static String path="D:\\demo\\";
public static String file="D:\\demo\\数据监控采用SQLServer CDC的可行性报告.docx";
public static void main(String[] args) throws Exception {
// String path = "D:\\demo\\";
// String file = "D:\\demo\\数据监控采用SQLServer CDC的可行性报告.docx";
File f = new File(file);
if (!f.exists()) {
System.out.println("Sorry File does not Exists!");
} else {
if (f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")) {
poiDocxToHtml(file, path);
} else {
poiDocToHtml(file, path);
}
}
}
public static void poiDocxToHtml(String file, String path) throws IOException {
// String file2 = "D:\\demo\\数据监控采用SQLServer CDC的可行性报告.html";
String fileName;
File f = new File(file);
if(f.exists()){
fileName=f.getName();
fileName=fileName.substring(0,fileName.lastIndexOf("."));
}else{
System.out.println("文件不存在!");
return;
}
String file2=fileName+".html";
//读取文档内容
InputStream in = new FileInputStream(f);
XWPFDocument document = new XWPFDocument(in);
File imageFolderFile = new File(path);
//加载html页面时图片路径
XHTMLOptions options = XHTMLOptions.create().URIResolver(new BasicURIResolver("./"));
//图片保存文件夹路径
options.setExtractor(new FileImageExtractor(imageFolderFile));
OutputStream out = new FileOutputStream(new File(file2));
XHTMLConverter.getInstance().convert(document, out, options);
out.close();
}
public static void poiDocToHtml(String file, String path) throws IOException, ParserConfigurationException, TransformerException {
InputStream input = new FileInputStream(file);
HWPFDocument wordDocument = new HWPFDocument(input);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
@Override
public String savePicture(byte[] content, PictureType pictureType,
String suggestedName, float widthInches, float heightInches) { //图片在html页面加载路径
return "image\\" + suggestedName;
}
});
wordToHtmlConverter.processDocument(wordDocument);
//获取文档中所有图片
List pics = wordDocument.getPicturesTable().getAllPictures();
if (pics != null) {
for (int i = 0; i < pics.size(); i++) {
Picture pic = (Picture) pics.get(i);
try {//图片保存在文件夹的路径
pic.writeImageContent(new FileOutputStream(path
+ pic.suggestFullFileName()));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
//创建html页面并将文档中内容写入页面
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
String content = new String(outStream.toString("UTF-8"));
System.out.println(content);
FileUtils.writeStringToFile(new File(path, "1.html"), content, "utf-8");
}
}