环境
poi-3.9
代码
public class WordToHtml {
private static final String encoding = "UTF-8";
public static String convert2Html(String wordPath)
throws FileNotFoundException, TransformerException, IOException,
ParserConfigurationException {
if( wordPath == null || "".equals(wordPath) ) return "";
File file = new File(wordPath);
if( file.exists() && file.isFile() )
return convert2Html(new FileInputStream(file));
else
return "";
}
public static String convert2Html(InputStream is)
throws TransformerException, IOException,
ParserConfigurationException {
HWPFDocument wordDocument = new HWPFDocument(is);
WordToHtmlConverter converter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument());
// 添加图片前缀,以防图片重复覆盖
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");
final String prefix = sdf.format(new Date());
converter.setPicturesManager(new PicturesManager() {
public String savePicture(byte[] content, PictureType pictureType,
String suggestedName, float widthInches, float heightInches) {
return prefix + "_" + suggestedName;
}
});
converter.processDocument(wordDocument);
List<Picture> pics = wordDocument.getPicturesTable().getAllPictures();
if (pics != null) {
for(Picture pic : pics) {
try {
pic.writeImageContent(new FileOutputStream(
"/" + prefix + "_" + pic.suggestFullFileName()));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
StringWriter writer = new StringWriter();
Transformer serializer = TransformerFactory.newInstance().newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, encoding);
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(
new DOMSource(converter.getDocument()),
new StreamResult(writer) );
writer.close();
return writer.toString();
}
}