1.pom.xml.主要依赖
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.10-FINAL</version>
</dependency>
2.工具类
package com.exam.main;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.w3c.dom.Document;
import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.List;
/**
* Created by xin on 14/11/6.
*/
public class Main {
private static void trySetSAXFeature(DocumentBuilderFactory documentBuilderFactory, String feature, boolean enabled) {
try {
documentBuilderFactory.setFeature(feature, enabled);
} catch (Exception e) {
e.printStackTrace();
} catch (AbstractMethodError ame) {
ame.printStackTrace();
}
}
public static DocumentBuilderFactory getDocumentBuilderFactory() {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setExpandEntityReferences(false);
trySetSAXFeature(factory, XMLConstants.FEATURE_SECURE_PROCESSING, true);
trySetSAXFeature(factory, "http://xml.org/sax/features/external-general-entities", false);
trySetSAXFeature(factory, "http://xml.org/sax/features/external-parameter-entities", false);
trySetSAXFeature(factory, "http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
trySetSAXFeature(factory, "http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
return factory;
}
/**
* 获取文件名(不含文件扩展名)
* @param sourceFile 输入文件
* @return 返回的文件名
*/
public static String getFileNameWithoutExtension(File sourceFile){
String filename=sourceFile.getName();
return filename.substring(0,filename.lastIndexOf('.'));
}
/**
* doc转html(只支持doc格式,如果是docx,就要看XWPF).html文件与图片(如果有)都保存在doc文件所在目录.
* @param docFile 输入的doc文件
* @param savePic 是否保存图片
* @throws Exception
*/
public static void docToHtml(final File docFile, boolean savePic) throws Exception {
HWPFDocument hwpfDocument = new HWPFDocument(new FileInputStream(docFile));
Document newDocument = getDocumentBuilderFactory().newDocumentBuilder().newDocument();
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(newDocument);
if (savePic) {
List<Picture> pics=hwpfDocument.getPicturesTable().getAllPictures();
if(pics!=null&&pics.size()>0){
for(int i=0;i<pics.size();i++){
Picture pic = pics.get(i);
pic.writeImageContent(new FileOutputStream(docFile.getParent()+"/"+pic.suggestFullFileName()));
}
}
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {
return suggestedName;
}
});
}
wordToHtmlConverter.processDocument(hwpfDocument);
StringWriter stringWriter = new StringWriter();
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
transformer.setOutputProperty(OutputKeys.METHOD, "html");
transformer.transform(new DOMSource(wordToHtmlConverter.getDocument()), new StreamResult(stringWriter));
FileChannel fileChannel = new FileOutputStream(docFile.getParent()+"/"+getFileNameWithoutExtension(docFile)+".html").getChannel();
fileChannel.write(ByteBuffer.wrap(stringWriter.toString().getBytes()));
fileChannel.close();
}
public static void main(String[] args) throws Exception {
File file=new File("C:\\Users\\xin\\Desktop\\werwr\\1.doc");
docToHtml(file, true);
}
}