使用POI将word文档转出HTML

原创 2018年04月17日 16:34:33

POM.XML配置文件

<!--wordToHtml-->
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>3.14</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-scratchpad</artifactId>
    <version>3.14</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>3.14</version>
</dependency>
<dependency>
    <groupId>fr.opensagres.xdocreport</groupId>
    <artifactId>xdocreport</artifactId>
    <version>1.0.6</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml-schemas</artifactId>
    <version>3.14</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>ooxml-schemas</artifactId>
    <version>1.3</version>
</dependency>
<dependency>
    <groupId>net.sf.jacob-project</groupId>
    <artifactId>jacob</artifactId>
    <version>1.14.3</version>
</dependency>
<dependency>
    <groupId>javax.servlet</groupId>
    <artifactId>servlet-api</artifactId>
    <scope>compile</scope>
</dependency>
<dependency>
    <groupId>com.mchange</groupId>
    <artifactId>mchange-commons-java</artifactId>
    <version>0.2.11</version>
    <scope>compile</scope>
</dependency>
<dependency>
    <groupId>fr.opensagres.xdocreport</groupId>
    <artifactId>fr.opensagres.xdocreport.document</artifactId>
    <version>2.0.1</version>
</dependency>
<dependency>
    <groupId>fr.opensagres.xdocreport</groupId>
    <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
    <version>1.0.6</version>
</dependency>

生成代码如下:

import org.apache.commons.io.FileUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.converter.core.BasicURIResolver;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.List;

public class DocToHtml {

    public static String path="D:\\demo\\";
    public static String file="D:\\demo\\数据监控采用SQLServer CDC的可行性报告.docx";

    public static void main(String[] args) throws Exception {
//        String path = "D:\\demo\\";
//        String file = "D:\\demo\\数据监控采用SQLServer CDC的可行性报告.docx";
File f = new File(file);
        if (!f.exists()) {
            System.out.println("Sorry File does not Exists!");
} else {
            if (f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")) {
                poiDocxToHtml(file, path);
} else {
                poiDocToHtml(file, path);
}
        }
    }

    public static void poiDocxToHtml(String file, String path) throws IOException {
//        String file2 = "D:\\demo\\数据监控采用SQLServer CDC的可行性报告.html";
String fileName;
File f = new File(file);
        if(f.exists()){
            fileName=f.getName();
fileName=fileName.substring(0,fileName.lastIndexOf("."));
}else{
            System.out.println("文件不存在!");
            return;
}
        String file2=fileName+".html";
//读取文档内容
InputStream in = new FileInputStream(f);
XWPFDocument document = new XWPFDocument(in);
File imageFolderFile = new File(path);
//加载html页面时图片路径
XHTMLOptions options = XHTMLOptions.create().URIResolver(new BasicURIResolver("./"));
//图片保存文件夹路径
options.setExtractor(new FileImageExtractor(imageFolderFile));
OutputStream out = new FileOutputStream(new File(file2));
XHTMLConverter.getInstance().convert(document, out, options);
out.close();
}


    public static void poiDocToHtml(String file, String path) throws IOException, ParserConfigurationException, TransformerException {
        InputStream input = new FileInputStream(file);
HWPFDocument wordDocument = new HWPFDocument(input);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
            @Override
public String savePicture(byte[] content, PictureType pictureType,
String suggestedName, float widthInches, float heightInches) {     //图片在html页面加载路径
return "image\\" + suggestedName;
}
        });
wordToHtmlConverter.processDocument(wordDocument);
//获取文档中所有图片
List pics = wordDocument.getPicturesTable().getAllPictures();
        if (pics != null) {
            for (int i = 0; i < pics.size(); i++) {
                Picture pic = (Picture) pics.get(i);
                try {//图片保存在文件夹的路径
pic.writeImageContent(new FileOutputStream(path
                            + pic.suggestFullFileName()));
} catch (FileNotFoundException e) {
                    e.printStackTrace();
}
            }
        }
        //创建html页面并将文档中内容写入页面
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
String content = new String(outStream.toString("UTF-8"));
System.out.println(content);
FileUtils.writeStringToFile(new File(path, "1.html"), content, "utf-8");
}
}



Word2010 商业文档排版技巧

-
  • 1970年01月01日 08:00

POI把html写入word doc文件

直接把Html文本写入到Word文件 获取查看页面的body内容和引用的css文件路径传入到后台。把对应css文件的内容读取出来。利用body内容和css文件的内容组成一个标准格式的Html文本...
  • lcczzu
  • lcczzu
  • 2015-07-13 14:44:50
  • 1432

java将html导出成word(利用的poi包导出)

//导出到word public void exportWord(ActionMapping actionMapping, ActionForm actionForm, HttpServlet...
  • myfmyfmyfmyf
  • myfmyfmyfmyf
  • 2014-05-12 17:08:44
  • 22305

POI之Word转化为Html-yellowcong

这个将Word转化Html这个功能16年1月份就做出来了,并将这个功能单独封装成了一个Util包,这个类主要用到了`WordToHtmlConverter` 中的`processDocument`函数...
  • yelllowcong
  • yelllowcong
  • 2017-07-16 01:23:04
  • 728

Java使用poi将word转换为html

使用poi将word转换为html,支持doc,docx,转换后可以保持图片、样式 1.导入Maven包 org.apache.poi poi 3.14 org.apache...
  • dong_18383219470
  • dong_18383219470
  • 2016-12-09 13:46:00
  • 2243

poi将html导出到word

import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileOutputStream; import...
  • myfmyfmyfmyf
  • myfmyfmyfmyf
  • 2014-05-12 09:09:24
  • 2174

poi实现word转换html兼容03和07

  • 2016年01月20日 09:12
  • 20.1MB
  • 下载

使用poi将word转换为html

 使用poi将word转换为html 使用poi将word转换为html,支持doc,docx,转换后可以保持图片、样式 演示地址: https://www.xiaoyun.stud...
  • qq_22498277
  • qq_22498277
  • 2016-11-22 16:47:05
  • 3506

POI 实现word和html互转

POI转HTML(仅针对doc): package com.vito.demo.test; import java.io.ByteArrayOutputStream; ...
  • princeLuan
  • princeLuan
  • 2017-06-07 14:59:56
  • 832

poi将word转换成html、样式 表格 图片处理

  • 2013年03月04日 10:25
  • 2.5MB
  • 下载
收藏助手
不良信息举报
您举报文章:使用POI将word文档转出HTML
举报原因:
原因补充:

(最多只允许输入30个字)