java word html_java将Word转换为html

packagecom.lmt.service.file;importjava.io.ByteArrayInputStream;importjava.io.ByteArrayOutputStream;importjava.io.File;importjava.io.FileInputStream;importjava.io.FileOutputStream;importjava.io.FileWriter;importjava.io.IOException;importjava.io.InputStream;importjava.util.UUID;importjavax.xml.parsers.DocumentBuilderFactory;importjavax.xml.parsers.ParserConfigurationException;importjavax.xml.transform.OutputKeys;importjavax.xml.transform.Transformer;importjavax.xml.transform.TransformerException;importjavax.xml.transform.TransformerFactory;importjavax.xml.transform.dom.DOMSource;importjavax.xml.transform.stream.StreamResult;importorg.apache.poi.hwpf.HWPFDocument;importorg.apache.poi.hwpf.converter.PicturesManager;importorg.apache.poi.hwpf.converter.WordToHtmlConverter;importorg.apache.poi.hwpf.usermodel.PictureType;importorg.apache.poi.util.IOUtils;importorg.apache.poi.xwpf.usermodel.XWPFDocument;importorg.slf4j.Logger;importorg.slf4j.LoggerFactory;importorg.springframework.beans.factory.annotation.Autowired;importorg.springframework.stereotype.Component;importorg.springframework.web.multipart.MultipartFile;importorg.w3c.dom.Document;importfr.opensagres.poi.xwpf.converter.core.ImageManager;importfr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;importfr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;

@Componentpublic classWordToHtml {private static final Logger logger = LoggerFactory.getLogger(WordToHtml.class);

@AutowiredprivateParseFile parseFile;publicFile convert(MultipartFile file) {

String filename=file.getOriginalFilename();

String suffix=filename.substring(filename.lastIndexOf("."));

String newName=UUID.randomUUID().toString();//TODO 需要保存在一个新的位置

File convFile = new File("D:/test/" + newName +suffix);

FileOutputStream fos= null;try{

convFile.createNewFile();

fos= newFileOutputStream(convFile);

fos.write(file.getBytes());

}catch(IOException ex) {

logger.error("上传文件出错!", ex);return null;

}finally{

IOUtils.closeQuietly(fos);

}//输入文件名的所在文件夹//加上反斜杠

String parentDirectory =convFile.getParent();if (!parentDirectory.endsWith("\\")) {

parentDirectory= parentDirectory + "\\";

}if (filename.endsWith(".docx")) {returndocxConvert(parentDirectory, convFile.getAbsolutePath(),newName);

}else if (filename.endsWith(".doc")) {returndocConvert(parentDirectory, convFile.getAbsolutePath(),newName);

}else{

logger.error("不支持的文件格式!");return null;

}

}privateFile docxConvert(String parentDirectory, String filename,String newName) {try{

XWPFDocument document= new XWPFDocument(newFileInputStream(filename));

XHTMLOptions options= XHTMLOptions.create().setImageManager(new ImageManager(new File(parentDirectory), UUID.randomUUID().toString())).indent(4);

FileOutputStream out= new FileOutputStream(new File(parentDirectory + newName+ ".html"));

XHTMLConverter.getInstance().convert(document, out, options);return new File(parentDirectory + newName+ ".html");

}catch(IOException ex) {

logger.error("word转化出错!", ex);return null;

}

}privateFile docConvert(String parentDirectory, String filename,String newName) {try{

HWPFDocument document= new HWPFDocument(newFileInputStream(filename));

WordToHtmlConverter wordToHtmlConverter= newWordToHtmlConverter(

DocumentBuilderFactory.newInstance().newDocumentBuilder()

.newDocument());//converter默认对图片不作处理,需要手动下载图片并嵌入到html中

wordToHtmlConverter.setPicturesManager(newPicturesManager() {

@Overridepublic String savePicture(byte[] bytes, PictureType pictureType, String s, float v, floatv1) {

String imageFilename= parentDirectory + "";

String identity=UUID.randomUUID().toString();

File imageFile= new File(imageFilename, identity+s);

imageFile.getParentFile().mkdirs();

InputStream in= null;

FileOutputStream out= null;try{

in= newByteArrayInputStream(bytes);

out= newFileOutputStream(imageFile);

IOUtils.copy(in, out);

}catch(IOException ex) {

logger.error("word转化出错!", ex);

}finally{if (in != null) {

IOUtils.closeQuietly(in);

}if (out != null) {

IOUtils.closeQuietly(out);

}

}returnimageFile.getName();

}

});

wordToHtmlConverter.processDocument(document);

Document htmlDocument=wordToHtmlConverter.getDocument();

ByteArrayOutputStream out= newByteArrayOutputStream();

DOMSource domSource= newDOMSource(htmlDocument);

StreamResult streamResult= newStreamResult(out);

TransformerFactory tf=TransformerFactory.newInstance();

Transformer serializer=tf.newTransformer();

serializer.setOutputProperty(OutputKeys.ENCODING,"UTF-8");

serializer.setOutputProperty(OutputKeys.INDENT,"yes");

serializer.setOutputProperty(OutputKeys.METHOD,"html");

serializer.transform(domSource, streamResult);

out.close();

String result= newString(out.toByteArray());

FileWriter writer= new FileWriter(parentDirectory + newName + ".html");

writer.write(result);

writer.close();

}catch (IOException | TransformerException |ParserConfigurationException ex) {

logger.error("word转化出错!", ex);

}return new File(parentDirectory + newName + ".html");

}/*** 将上传的Word文档转化成HTML字符串

*@paramattachfile

*@return

*/

publicString convertToHtml(MultipartFile attachfile) {

String wordContent= "";//将Word文件转换为html

File file =convert(attachfile);//读取html文件

if (file != null) {

wordContent=parseFile.readHtml(file);

}returnwordContent;

}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值