使用POI将word文档转出HTML

POM.XML配置文件

<!--wordToHtml-->
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>3.14</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-scratchpad</artifactId>
    <version>3.14</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>3.14</version>
</dependency>
<dependency>
    <groupId>fr.opensagres.xdocreport</groupId>
    <artifactId>xdocreport</artifactId>
    <version>1.0.6</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml-schemas</artifactId>
    <version>3.14</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>ooxml-schemas</artifactId>
    <version>1.3</version>
</dependency>
<dependency>
    <groupId>net.sf.jacob-project</groupId>
    <artifactId>jacob</artifactId>
    <version>1.14.3</version>
</dependency>
<dependency>
    <groupId>javax.servlet</groupId>
    <artifactId>servlet-api</artifactId>
    <scope>compile</scope>
</dependency>
<dependency>
    <groupId>com.mchange</groupId>
    <artifactId>mchange-commons-java</artifactId>
    <version>0.2.11</version>
    <scope>compile</scope>
</dependency>
<dependency>
    <groupId>fr.opensagres.xdocreport</groupId>
    <artifactId>fr.opensagres.xdocreport.document</artifactId>
    <version>2.0.1</version>
</dependency>
<dependency>
    <groupId>fr.opensagres.xdocreport</groupId>
    <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
    <version>1.0.6</version>
</dependency>

生成代码如下:

import org.apache.commons.io.FileUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.converter.core.BasicURIResolver;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.List;

public class DocToHtml {

    public static String path="D:\\demo\\";
    public static String file="D:\\demo\\数据监控采用SQLServer CDC的可行性报告.docx";

    public static void main(String[] args) throws Exception {
//        String path = "D:\\demo\\";
//        String file = "D:\\demo\\数据监控采用SQLServer CDC的可行性报告.docx";
File f = new File(file);
        if (!f.exists()) {
            System.out.println("Sorry File does not Exists!");
} else {
            if (f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")) {
                poiDocxToHtml(file, path);
} else {
                poiDocToHtml(file, path);
}
        }
    }

    public static void poiDocxToHtml(String file, String path) throws IOException {
//        String file2 = "D:\\demo\\数据监控采用SQLServer CDC的可行性报告.html";
String fileName;
File f = new File(file);
        if(f.exists()){
            fileName=f.getName();
fileName=fileName.substring(0,fileName.lastIndexOf("."));
}else{
            System.out.println("文件不存在!");
            return;
}
        String file2=fileName+".html";
//读取文档内容
InputStream in = new FileInputStream(f);
XWPFDocument document = new XWPFDocument(in);
File imageFolderFile = new File(path);
//加载html页面时图片路径
XHTMLOptions options = XHTMLOptions.create().URIResolver(new BasicURIResolver("./"));
//图片保存文件夹路径
options.setExtractor(new FileImageExtractor(imageFolderFile));
OutputStream out = new FileOutputStream(new File(file2));
XHTMLConverter.getInstance().convert(document, out, options);
out.close();
}


    public static void poiDocToHtml(String file, String path) throws IOException, ParserConfigurationException, TransformerException {
        InputStream input = new FileInputStream(file);
HWPFDocument wordDocument = new HWPFDocument(input);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
            @Override
public String savePicture(byte[] content, PictureType pictureType,
String suggestedName, float widthInches, float heightInches) {     //图片在html页面加载路径
return "image\\" + suggestedName;
}
        });
wordToHtmlConverter.processDocument(wordDocument);
//获取文档中所有图片
List pics = wordDocument.getPicturesTable().getAllPictures();
        if (pics != null) {
            for (int i = 0; i < pics.size(); i++) {
                Picture pic = (Picture) pics.get(i);
                try {//图片保存在文件夹的路径
pic.writeImageContent(new FileOutputStream(path
                            + pic.suggestFullFileName()));
} catch (FileNotFoundException e) {
                    e.printStackTrace();
}
            }
        }
        //创建html页面并将文档中内容写入页面
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
String content = new String(outStream.toString("UTF-8"));
System.out.println(content);
FileUtils.writeStringToFile(new File(path, "1.html"), content, "utf-8");
}
}



阅读更多

没有更多推荐了,返回首页