package com.office;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.w3c.dom.Document;
public class OfficeConvert {
public static String GetFileExt(File f) {
String ext = null;
String s = f.getName();
int i = s.lastIndexOf('.');
if (i > 0 && i < s.length() - 1) {
ext = s.substring(i + 1).toLowerCase();
}
return ext;
}
public static boolean ConvertToHtml(final String input, final String output) {
File file = new File(input);
if (!file.exists())
return false;
String ext = GetFileExt(file);
FileInputStream fileInputStream = null;
FileOutputStream fileOutputStream = null;
try {
if (ext.equals("doc")) {
fileInputStream = new FileInputStream(input);
HWPFDocument hwpfDocument = new HWPFDocument(fileInputStream);
Document document = DocumentBuilderFactory.newInstance()
.newDocumentBuilder().newDocument();
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
document);
File imageDir = new File(output + ".img");
final String suggestDirName = imageDir.getName();
if (!imageDir.isDirectory()) {
imageDir.mkdirs();
}
//save image
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
@Override
public String savePicture(byte[] contet,
PictureType pictureType, String suggestedName,
float widthInches, float heightInches) {
String imgagePath = output + ".img/" + suggestedName;
File file = new File(imgagePath);
FileOutputStream fos = null;
try {
fos = new FileOutputStream(file);
fos.write(contet);
fos.close();
} catch (Exception e) {
e.printStackTrace();
}
return suggestDirName + "/" + suggestedName;
}
});
wordToHtmlConverter.processDocument(hwpfDocument);
Transformer transformer = TransformerFactory.newInstance()
.newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
transformer.setOutputProperty(OutputKeys.METHOD, "html");
fileOutputStream = new FileOutputStream(output);
transformer.transform(
new DOMSource(wordToHtmlConverter.getDocument()),
new StreamResult(new OutputStreamWriter(fileOutputStream, "UTF-8")));
return true;
} else if (ext.equals("docx")){
} else if (ext.equals("ppt")) {
} else if (ext.equals("pptx")) {
} else if (ext.equals("xls")) {
} else if (ext.equals("xlsx")) {
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (null != fileOutputStream)
fileOutputStream.close();
if (null != fileInputStream)
fileInputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return false;
}
}
POI Word DOC格式转Html
最新推荐文章于 2021-01-16 19:44:54 发布