packagecom.lmt.service.file;importjava.io.ByteArrayInputStream;importjava.io.ByteArrayOutputStream;importjava.io.File;importjava.io.FileInputStream;importjava.io.FileOutputStream;importjava.io.FileWriter;importjava.io.IOException;importjava.io.InputStream;importjava.util.UUID;importjavax.xml.parsers.DocumentBuilderFactory;importjavax.xml.parsers.ParserConfigurationException;importjavax.xml.transform.OutputKeys;importjavax.xml.transform.Transformer;importjavax.xml.transform.TransformerException;importjavax.xml.transform.TransformerFactory;importjavax.xml.transform.dom.DOMSource;importjavax.xml.transform.stream.StreamResult;importorg.apache.poi.hwpf.HWPFDocument;importorg.apache.poi.hwpf.converter.PicturesManager;importorg.apache.poi.hwpf.converter.WordToHtmlConverter;importorg.apache.poi.hwpf.usermodel.PictureType;importorg.apache.poi.util.IOUtils;importorg.apache.poi.xwpf.usermodel.XWPFDocument;importorg.slf4j.Logger;importorg.slf4j.LoggerFactory;importorg.springframework.beans.factory.annotation.Autowired;importorg.springframework.stereotype.Component;importorg.springframework.web.multipart.MultipartFile;importorg.w3c.dom.Document;importfr.opensagres.poi.xwpf.converter.core.ImageManager;importfr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;importfr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
@Componentpublic classWordToHtml {private static final Logger logger = LoggerFactory.getLogger(WordToHtml.class);
@AutowiredprivateParseFile parseFile;publicFile convert(MultipartFile file) {
String filename=file.getOriginalFilename();
String suffix=filename.substring(filename.lastIndexOf("."));
String newName=UUID.randomUUID().toString();//TODO 需要保存在一个新的位置
File convFile = new File("D:/test/" + newName +suffix);
FileOutputStream fos= null;try{
convFile.createNewFile();
fos= newFileOutputStream(convFile);
fos.write(file.getBytes());
}catch(IOException ex) {
logger.error("上传文件出错!", ex);return null;
}finally{
IOUtils.closeQuietly(fos);
}//输入文件名的所在文件夹//加上反斜杠
String parentDirectory =convFile.getParent();if (!parentDirectory.endsWith("\\")) {
parentDirectory= parentDirectory + "\\";
}if (filename.endsWith(".docx")) {returndocxConvert(parentDirectory, convFile.getAbsolutePath(),newName);
}else if (filename.endsWith(".doc")) {returndocConvert(parentDirectory, convFile.getAbsolutePath(),newName);
}else{
logger.error("不支持的文件格式!");return null;
}
}privateFile docxConvert(String parentDirectory, String filename,String newName) {try{
XWPFDocument document= new XWPFDocument(newFileInputStream(filename));
XHTMLOptions options= XHTMLOptions.create().setImageManager(new ImageManager(new File(parentDirectory), UUID.randomUUID().toString())).indent(4);
FileOutputStream out= new FileOutputStream(new File(parentDirectory + newName+ ".html"));
XHTMLConverter.getInstance().convert(document, out, options);return new File(parentDirectory + newName+ ".html");
}catch(IOException ex) {
logger.error("word转化出错!", ex);return null;
}
}privateFile docConvert(String parentDirectory, String filename,String newName) {try{
HWPFDocument document= new HWPFDocument(newFileInputStream(filename));
WordToHtmlConverter wordToHtmlConverter= newWordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument());//converter默认对图片不作处理,需要手动下载图片并嵌入到html中
wordToHtmlConverter.setPicturesManager(newPicturesManager() {
@Overridepublic String savePicture(byte[] bytes, PictureType pictureType, String s, float v, floatv1) {
String imageFilename= parentDirectory + "";
String identity=UUID.randomUUID().toString();
File imageFile= new File(imageFilename, identity+s);
imageFile.getParentFile().mkdirs();
InputStream in= null;
FileOutputStream out= null;try{
in= newByteArrayInputStream(bytes);
out= newFileOutputStream(imageFile);
IOUtils.copy(in, out);
}catch(IOException ex) {
logger.error("word转化出错!", ex);
}finally{if (in != null) {
IOUtils.closeQuietly(in);
}if (out != null) {
IOUtils.closeQuietly(out);
}
}returnimageFile.getName();
}
});
wordToHtmlConverter.processDocument(document);
Document htmlDocument=wordToHtmlConverter.getDocument();
ByteArrayOutputStream out= newByteArrayOutputStream();
DOMSource domSource= newDOMSource(htmlDocument);
StreamResult streamResult= newStreamResult(out);
TransformerFactory tf=TransformerFactory.newInstance();
Transformer serializer=tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING,"UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT,"yes");
serializer.setOutputProperty(OutputKeys.METHOD,"html");
serializer.transform(domSource, streamResult);
out.close();
String result= newString(out.toByteArray());
FileWriter writer= new FileWriter(parentDirectory + newName + ".html");
writer.write(result);
writer.close();
}catch (IOException | TransformerException |ParserConfigurationException ex) {
logger.error("word转化出错!", ex);
}return new File(parentDirectory + newName + ".html");
}/*** 将上传的Word文档转化成HTML字符串
*@paramattachfile
*@return
*/
publicString convertToHtml(MultipartFile attachfile) {
String wordContent= "";//将Word文件转换为html
File file =convert(attachfile);//读取html文件
if (file != null) {
wordContent=parseFile.readHtml(file);
}returnwordContent;
}
}