package com.zxs.common;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.w3c.dom.Document;
/**
*
* @author
*
*/
public class Word2Html {
private static String outPictureDir = "";
private static final String ENCODING = "GB2312";
public static void main(String argv[]) {
try {
doc2Html("E://test//2.doc", "E://test//2.html");
} catch (Exception e) {
e.printStackTrace();
}
}
private static void initOutDir(String outPutPath) {
File file = new File(outPutPath);
File outdir = file.getParentFile();
if(!outdir.exists()){
outdir.mkdirs();
}
String outFileName = file.getName();
File pictureDir = new File(outdir, outFileName.substring(0, outFileName.lastIndexOf(".")));
if(!pictureDir.exists()){
pictureDir.mkdirs();
}
outPictureDir = pictureDir.getPath();
}
/**
* doc转换为html
*
* @param fileName
* @param outPutFile
* @throws TransformerException
* @throws IOException
* @throws ParserConfigurationException
*/
public static void doc2Html(String fileName, String outPutFile) throws TransformerException, IOException,
ParserConfigurationException {
long startTime = System.currentTimeMillis();
initOutDir(outPutFile);
HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance()
.newDocumentBuilder().newDocument());
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches,
float heightInches) {
String path = new File(outPictureDir).getName() + "//"+suggestedName;
try {
FileOutputStream file = new FileOutputStream(outPictureDir +"//"+ suggestedName);
file.write(content);
file.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return path;
}
});
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream out = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, ENCODING);
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
out.close();
writeFile(new String(out.toByteArray()).replaceAll("<span.*>\\s*TOC\\s*.*</span>", ""), outPutFile);
System.out.println("Generate " + outPutFile + " with " + (System.currentTimeMillis() - startTime) + " ms.");
}
/**
* 写文件
*
* @param content
* @param path
*/
public static void writeFile(String content, String path) {
FileOutputStream fos = null;
BufferedWriter bw = null;
try {
File file = new File(path);
fos = new FileOutputStream(file);
bw = new BufferedWriter(new OutputStreamWriter(fos, ENCODING));
bw.write(content);
} catch (FileNotFoundException fnfe) {
fnfe.printStackTrace();
} catch (IOException ioe) {
ioe.printStackTrace();
} finally {
try {
if (bw != null)
bw.close();
if (fos != null)
fos.close();
} catch (IOException ie) {
}
}
}
}