word2Html

package com.zxs.common;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.w3c.dom.Document;
/**
 * 
 * @author 
* */ public class Word2Html { private static String outPictureDir = ""; private static final String ENCODING = "GB2312"; public static void main(String argv[]) { try { doc2Html("E://test//2.doc", "E://test//2.html"); } catch (Exception e) { e.printStackTrace(); } } private static void initOutDir(String outPutPath) { File file = new File(outPutPath); File outdir = file.getParentFile(); if(!outdir.exists()){ outdir.mkdirs(); } String outFileName = file.getName(); File pictureDir = new File(outdir, outFileName.substring(0, outFileName.lastIndexOf("."))); if(!pictureDir.exists()){ pictureDir.mkdirs(); } outPictureDir = pictureDir.getPath(); } /** * doc转换为html * * @param fileName * @param outPutFile * @throws TransformerException * @throws IOException * @throws ParserConfigurationException */ public static void doc2Html(String fileName, String outPutFile) throws TransformerException, IOException, ParserConfigurationException { long startTime = System.currentTimeMillis(); initOutDir(outPutFile); HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName)); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance() .newDocumentBuilder().newDocument()); wordToHtmlConverter.setPicturesManager(new PicturesManager() { public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) { String path = new File(outPictureDir).getName() + "//"+suggestedName; try { FileOutputStream file = new FileOutputStream(outPictureDir +"//"+ suggestedName); file.write(content); file.close(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return path; } }); wordToHtmlConverter.processDocument(wordDocument); Document htmlDocument = wordToHtmlConverter.getDocument(); ByteArrayOutputStream out = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(out); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, ENCODING); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); out.close(); writeFile(new String(out.toByteArray()).replaceAll("<span.*>\\s*TOC\\s*.*</span>", ""), outPutFile); System.out.println("Generate " + outPutFile + " with " + (System.currentTimeMillis() - startTime) + " ms."); } /** * 写文件 * * @param content * @param path */ public static void writeFile(String content, String path) { FileOutputStream fos = null; BufferedWriter bw = null; try { File file = new File(path); fos = new FileOutputStream(file); bw = new BufferedWriter(new OutputStreamWriter(fos, ENCODING)); bw.write(content); } catch (FileNotFoundException fnfe) { fnfe.printStackTrace(); } catch (IOException ioe) { ioe.printStackTrace(); } finally { try { if (bw != null) bw.close(); if (fos != null) fos.close(); } catch (IOException ie) { } } } }

 

转载于:https://www.cnblogs.com/zhangxuesong/p/5787751.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值