项目上需要实现一个功能,是word转为html。word有两种格式doc和docx。在实现的过程中,遇到一个比较棘手的问题,一直报
java.lang.NoClassDefFoundError: org/openxmlformats/schemas/wordprocessingml/x2006/main/FontsDocument$Factory这样的错误,根据错误提示可以判断出是jar包的问题。在网上也搜索了有关于这方面的博客文章。换了一拨poi,还是存在这样的问题。后来在网上看到提示说需要引用完全版的ooxml-schemas.jar包,于是就在搜索ooxml-schemas-1.1.jar。但是很难找到。最后还是在之前的项目上找到了该jar包,引入之后执行,完美解决。
在这里把实现该功能代码及所需要的jar分享给需要的人。
public void docToHtml() throws Exception {
final String imagepath = "F:/test/image/";//解析时候如果doc文件中有图片 图片会保存在此路径
String filepath = "F:/test/";
String fileName = "123.doc";
String htmlName = "123.html";
final String file = filepath + fileName;
InputStream input = new FileInputStream(new File(file));
HWPFDocument wordDocument = new HWPFDocument(input);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
//设置图片存放的位置
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {
File imgPath = new File(imagepath);
if(!imgPath.exists()){//图片目录不存在则创建
imgPath.mkdirs();
}
File file = new File(imagepath + suggestedName);
try {
OutputStream os = new FileOutputStream(file);
os.write(content);
os.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return imagepath + suggestedName;
}
});
//解析word文档
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = wordToHtmlConverter.getDocument();
File htmlFile = new File(filepath + htmlName);
OutputStream outStream = new FileOutputStream(htmlFile);
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory factory = TransformerFactory.newInstance();
Transformer serializer = factory.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
}
public void docxToHtml() throws Exception{
String filepath = "F:/test/";
String fileName = "123.docx";
String htmlName = "1234.html";
final String file = filepath + fileName;
File f = new File(file);
if (!f.exists()) {
System.out.println("Sorry File does not Exists!");
} else {
if (f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")) {
//加载word文档生成 XWPFDocument对象
InputStream in = new FileInputStream(f);
XWPFDocument document = new XWPFDocument(in);
//解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)
File imageFolderFile = new File(filepath);
XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(imageFolderFile));
options.setExtractor(new FileImageExtractor(imageFolderFile));
options.setIgnoreStylesIfUnused(false);
options.setFragment(true);
// 将 XWPFDocument转换成XHTML
OutputStream out = new FileOutputStream(new File(filepath + htmlName));
XHTMLConverter.getInstance().convert(document, out, options);
}
}
}
链接: https://pan.baidu.com/s/1rGOr_ZFdMoIxlC02oXKo9g 提取码: 9a64