html在线浏览本地word,word导出html实现在线预览

最新推荐文章于 2024-07-01 11:05:40 发布

陈普照

最新推荐文章于 2024-07-01 11:05:40 发布

阅读量1.1k

点赞数

文章标签： html在线浏览本地word

需要的maven依赖经过编译，必须版本配合一致：

fr.opensagres.xdocreport

org.apache.poi.xwpf.converter.core

1.0.5

fr.opensagres.xdocreport

org.apache.poi.xwpf.converter.xhtml

1.0.5

org.apache.commons

commons-io

1.3.2

org.apache.poi

poi-scratchpad

3.17

org.apache.commons

commons-collections4

4.0

org.apache.poi

poi

3.17

org.apache.xmlbeans

xmlbeans

2.6.0

org.apache.poi

poi-ooxml

3.14

org.apache.commons

commons-lang3

3.4

package com.zyhao.openec.excel.utils;

import java.io.BufferedWriter;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.InputStream;

import java.io.OutputStreamWriter;

import java.util.List;

import javax.xml.parsers.DocumentBuilderFactory;

import javax.xml.transform.OutputKeys;

import javax.xml.transform.Transformer;

import javax.xml.transform.TransformerFactory;

import javax.xml.transform.dom.DOMSource;

import javax.xml.transform.stream.StreamResult;

import org.apache.commons.io.output.ByteArrayOutputStream;

import org.apache.poi.hwpf.HWPFDocument;

import org.apache.poi.hwpf.converter.PicturesManager;

import org.apache.poi.hwpf.converter.WordToHtmlConverter;

import org.apache.poi.hwpf.usermodel.Picture;

import org.apache.poi.hwpf.usermodel.PictureType;

import org.apache.poi.xwpf.converter.core.FileImageExtractor;

import org.apache.poi.xwpf.converter.core.IURIResolver;

import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;

import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;

import org.apache.poi.xwpf.usermodel.XWPFDocument;

import org.w3c.dom.Document;

public class WordToHtml {

public static void main(String[] args) {

try {

wordToHtml("E:\me\2.docx", "E:\me\", "123.html");

// wordToHtml("E:\me\2.doc", "E:\me\", "12.html");

} catch (Exception e) {

e.printStackTrace();

}

public static void wordToHtml(String wordPath,String htmlPath,String newFilename) throws Exception {

convert2Html(wordPath, htmlPath, newFilename);

}

public static void writeFile(String content, String path) throws Exception {

FileOutputStream fos = null;

BufferedWriter bw = null;

try {

File file = new File(path);

fos = new FileOutputStream(file);

bw = new BufferedWriter(new OutputStreamWriter(fos));

bw.write(content);

} catch (FileNotFoundException fnfe) {

fnfe.printStackTrace();

} catch (IOException ioe) {

ioe.printStackTrace();

} finally {

try {

if (bw != null)

bw.close();

if (fos != null)

fos.close();

} catch (IOException ie) {

}

/**

* 将word转换成html

* 支持 .doc and .docx

* @param fileName word文件名

* @param outPutFilePath html存储路径

* @param newFileName html名

* @throws Exception

public static void convert2Html(String fileName, String outPutFilePath,String newFileName)

throws Exception {

String substring = fileName.substring(fileName.lastIndexOf(".")+1);

ByteArrayOutputStream out = new ByteArrayOutputStream();

/**

* word2007和word2003的构建方式不同，

* 前者的构建方式是xml，后者的构建方式是dom树。

* 文件的后缀也不同，前者后缀为.docx，后者后缀为.doc

* 相应的，apache.poi提供了不同的实现类。

if("docx".equals(substring)){

// writeFile(new String("

对不起，.docx格式的word文档，暂时不能生成预览".getBytes("utf-8")), outPutFilePath+newFileName);

//step 1 : load DOCX into XWPFDocument

InputStream inputStream = new FileInputStream(new File(fileName));

XWPFDocument document = new XWPFDocument(inputStream);

//step 2 : prepare XHTML options

final String imageUrl = "";

XHTMLOptions options = XHTMLOptions.create();

options.setExtractor(new FileImageExtractor(new File(outPutFilePath + imageUrl)));

options.setIgnoreStylesIfUnused(false);

options.setFragment(true);

options.URIResolver(new IURIResolver() {

// @Override 重写的方法，加上这个报错，你看看是啥问题

public String resolve(String uri) {

return imageUrl + uri;

}

});

//step 3 : convert XWPFDocument to XHTML

XHTMLConverter.getInstance().convert(document, out, options);

}else{

HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));//WordToHtmlUtils.loadDoc(new FileInputStream(inputFile));

WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(

DocumentBuilderFactory.newInstance().newDocumentBuilder()

.newDocument());

wordToHtmlConverter.setPicturesManager( new PicturesManager()

{

public String savePicture( byte[] content,

PictureType pictureType, String suggestedName,

float widthInches, float heightInches )

{

return suggestedName;

}

} );

wordToHtmlConverter.processDocument(wordDocument);

//save pictures

List pics=wordDocument.getPicturesTable().getAllPictures();

if(pics!=null&&!pics.isEmpty()){

for(int i=0;i

Picture pic = (Picture)pics.get(i);

System.out.println();

try {

pic.writeImageContent(new FileOutputStream(outPutFilePath

+ pic.suggestFullFileName()));

} catch (FileNotFoundException e) {

e.printStackTrace();

}

Document htmlDocument = wordToHtmlConverter.getDocument();

DOMSource domSource = new DOMSource(htmlDocument);

StreamResult streamResult = new StreamResult(out);

TransformerFactory tf = TransformerFactory.newInstance(); //这个应该是转换成xml的

Transformer serializer = tf.newTransformer();

serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");

serializer.setOutputProperty(OutputKeys.INDENT, "yes");

serializer.setOutputProperty(OutputKeys.METHOD, "html");

serializer.transform(domSource, streamResult);

}

out.close();

writeFile(new String(out.toByteArray()), outPutFilePath+newFileName);

}

陈普照

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫