java word html_java将Word转换为html

最新推荐文章于 2024-07-22 17:48:37 发布

张小放

最新推荐文章于 2024-07-22 17:48:37 发布

阅读量91

点赞数

文章标签： java word html

本文链接：https://blog.csdn.net/weixin_35110266/article/details/114475858

版权

packagecom.lmt.service.file;importjava.io.ByteArrayInputStream;importjava.io.ByteArrayOutputStream;importjava.io.File;importjava.io.FileInputStream;importjava.io.FileOutputStream;importjava.io.FileWriter;importjava.io.IOException;importjava.io.InputStream;importjava.util.UUID;importjavax.xml.parsers.DocumentBuilderFactory;importjavax.xml.parsers.ParserConfigurationException;importjavax.xml.transform.OutputKeys;importjavax.xml.transform.Transformer;importjavax.xml.transform.TransformerException;importjavax.xml.transform.TransformerFactory;importjavax.xml.transform.dom.DOMSource;importjavax.xml.transform.stream.StreamResult;importorg.apache.poi.hwpf.HWPFDocument;importorg.apache.poi.hwpf.converter.PicturesManager;importorg.apache.poi.hwpf.converter.WordToHtmlConverter;importorg.apache.poi.hwpf.usermodel.PictureType;importorg.apache.poi.util.IOUtils;importorg.apache.poi.xwpf.usermodel.XWPFDocument;importorg.slf4j.Logger;importorg.slf4j.LoggerFactory;importorg.springframework.beans.factory.annotation.Autowired;importorg.springframework.stereotype.Component;importorg.springframework.web.multipart.MultipartFile;importorg.w3c.dom.Document;importfr.opensagres.poi.xwpf.converter.core.ImageManager;importfr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;importfr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;

@Componentpublic classWordToHtml {private static final Logger logger = LoggerFactory.getLogger(WordToHtml.class);

@AutowiredprivateParseFile parseFile;publicFile convert(MultipartFile file) {

String filename=file.getOriginalFilename();

String suffix=filename.substring(filename.lastIndexOf("."));

String newName=UUID.randomUUID().toString();//TODO 需要保存在一个新的位置

File convFile = new File("D:/test/" + newName +suffix);

FileOutputStream fos= null;try{

convFile.createNewFile();

fos= newFileOutputStream(convFile);

fos.write(file.getBytes());

}catch(IOException ex) {

logger.error("上传文件出错！", ex);return null;

}finally{

IOUtils.closeQuietly(fos);

}//输入文件名的所在文件夹//加上反斜杠

String parentDirectory =convFile.getParent();if (!parentDirectory.endsWith("\\")) {

parentDirectory= parentDirectory + "\\";

}if (filename.endsWith(".docx")) {returndocxConvert(parentDirectory, convFile.getAbsolutePath(),newName);

}else if (filename.endsWith(".doc")) {returndocConvert(parentDirectory, convFile.getAbsolutePath(),newName);

}else{

logger.error("不支持的文件格式！");return null;

}

}privateFile docxConvert(String parentDirectory, String filename,String newName) {try{

XWPFDocument document= new XWPFDocument(newFileInputStream(filename));

XHTMLOptions options= XHTMLOptions.create().setImageManager(new ImageManager(new File(parentDirectory), UUID.randomUUID().toString())).indent(4);

FileOutputStream out= new FileOutputStream(new File(parentDirectory + newName+ ".html"));

XHTMLConverter.getInstance().convert(document, out, options);return new File(parentDirectory + newName+ ".html");

}catch(IOException ex) {

logger.error("word转化出错！", ex);return null;

}

}privateFile docConvert(String parentDirectory, String filename,String newName) {try{

HWPFDocument document= new HWPFDocument(newFileInputStream(filename));

WordToHtmlConverter wordToHtmlConverter= newWordToHtmlConverter(

DocumentBuilderFactory.newInstance().newDocumentBuilder()

.newDocument());//converter默认对图片不作处理，需要手动下载图片并嵌入到html中

wordToHtmlConverter.setPicturesManager(newPicturesManager() {

@Overridepublic String savePicture(byte[] bytes, PictureType pictureType, String s, float v, floatv1) {

String imageFilename= parentDirectory + "";

String identity=UUID.randomUUID().toString();

File imageFile= new File(imageFilename, identity+s);

imageFile.getParentFile().mkdirs();

InputStream in= null;

FileOutputStream out= null;try{

in= newByteArrayInputStream(bytes);

out= newFileOutputStream(imageFile);

IOUtils.copy(in, out);

}catch(IOException ex) {

logger.error("word转化出错！", ex);

}finally{if (in != null) {

IOUtils.closeQuietly(in);

}if (out != null) {

IOUtils.closeQuietly(out);

}

}returnimageFile.getName();

}

});

wordToHtmlConverter.processDocument(document);

Document htmlDocument=wordToHtmlConverter.getDocument();

ByteArrayOutputStream out= newByteArrayOutputStream();

DOMSource domSource= newDOMSource(htmlDocument);

StreamResult streamResult= newStreamResult(out);

TransformerFactory tf=TransformerFactory.newInstance();

Transformer serializer=tf.newTransformer();

serializer.setOutputProperty(OutputKeys.ENCODING,"UTF-8");

serializer.setOutputProperty(OutputKeys.INDENT,"yes");

serializer.setOutputProperty(OutputKeys.METHOD,"html");

serializer.transform(domSource, streamResult);

out.close();

String result= newString(out.toByteArray());

FileWriter writer= new FileWriter(parentDirectory + newName + ".html");

writer.write(result);

writer.close();

}catch (IOException | TransformerException |ParserConfigurationException ex) {

logger.error("word转化出错！", ex);

}return new File(parentDirectory + newName + ".html");

}/*** 将上传的Word文档转化成HTML字符串

*@paramattachfile

*@return

publicString convertToHtml(MultipartFile attachfile) {

String wordContent= "";//将Word文件转换为html

File file =convert(attachfile);//读取html文件

if (file != null) {

wordContent=parseFile.readHtml(file);

}returnwordContent;

}

张小放

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫