importorg.apache.poi.hwpf.HWPFDocument;importorg.apache.poi.hwpf.converter.WordToHtmlConverter;importorg.apache.poi.xwpf.converter.core.BasicURIResolver;importorg.apache.poi.xwpf.converter.core.FileImageExtractor;importorg.apache.poi.xwpf.converter.xhtml.XHTMLConverter;importorg.apache.poi.xwpf.converter.xhtml.XHTMLOptions;importorg.apache.poi.xwpf.usermodel.XWPFDocument;importorg.w3c.dom.Document;importjavax.xml.parsers.DocumentBuilderFactory;importjavax.xml.transform.OutputKeys;importjavax.xml.transform.Transformer;importjavax.xml.transform.TransformerFactory;importjavax.xml.transform.dom.DOMSource;importjavax.xml.transform.stream.StreamResult;importjava.io.File;importjava.io.FileInputStream;importjava.io.FileOutputStream;importjava.io.OutputStreamWriter;public classTest {//doc转换为html
void docToHtml() throwsException {
String sourceFileName= "C:\\doc\\test.doc";
String targetFileName= "C:\\html\\test.html";
String imagePathStr= "C:\\html\\image\\";
HWPFDocument wordDocument= new HWPFDocument(newFileInputStream(sourceFileName));
Document document=DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
WordToHtmlConverter wordToHtmlConverter= newWordToHtmlConverter(document);//保存图片,并返回图片的相对路径
wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) ->{try(FileOutputStream out = new FileOutputStream(imagePathStr +name)){
out.write(content);
}catch(Exception e) {
e.printStackTrace();
}return "image/" +name;
});
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument=wordToHtmlConverter.getDocument();
DOMSource domSource= newDOMSource(htmlDocument);
StreamResult streamResult= new StreamResult(newFile(targetFileName));
TransformerFactory tf=TransformerFactory.newInstance();
Transformer serializer=tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING,"utf-8");
serializer.setOutputProperty(OutputKeys.INDENT,"yes");
serializer.setOutputProperty(OutputKeys.METHOD,"html");
serializer.transform(domSource, streamResult);
}//docx转换为html
public void docxToHtml() throwsException {
String sourceFileName= "D:\\ac\\00.docx";
String targetFileName= "D:\\ac\\test.html";
String imagePathStr= "D:\\ac\\image\\";
OutputStreamWriter outputStreamWriter= null;try{
XWPFDocument document= new XWPFDocument(newFileInputStream(sourceFileName));
XHTMLOptions options=XHTMLOptions.create();//存放图片的文件夹
options.setExtractor(new FileImageExtractor(newFile(imagePathStr)));//html中图片的路径
options.URIResolver(new BasicURIResolver("image"));
outputStreamWriter= new OutputStreamWriter(new FileOutputStream(targetFileName), "utf-8");
XHTMLConverter xhtmlConverter=(XHTMLConverter) XHTMLConverter.getInstance();
xhtmlConverter.convert(document, outputStreamWriter, options);
}finally{if (outputStreamWriter != null) {
outputStreamWriter.close();
}
}
}