1,需求
页面预览word文档,用的是poi3.8,以下代码支持表格、图片,不支持html页面分页,只支持doc,不支持docx;
2,word文档内容:
3,代码
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.List;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.w3c.dom.Document;
public class Word2Html {
//主函数
public static void main(String argv[]) {
try {
//源文档,目的文档存放位置
convert2Html("D:/test/1.doc","D:/test/1.html");
} catch (Exception e) {
e.printStackTrace();
}
}
//写入文件内容
public static void writeFile(String content, String path) {
FileOutputStream fos = null;
BufferedWriter bw = null;
try {
File file = new File(path);
fos = new FileOutputStream(file);
//编码根据实际情况修改
bw = new BufferedWriter(new OutputStreamWriter(fos,"UTF-8"));
bw.write(content);
} catch (FileNotFoundException fnfe) {
fnfe.printStackTrace();
} catch (IOException ioe) {
ioe.printStackTrace();
} finally {
try {
if (bw != null){
bw.close();
}
if (fos != null){
fos.close();
}
} catch (IOException ie) {
}
}
}
//将文件内容输出,转换为html格式
public static void convert2Html(String fileName, String outPutFile)
throws TransformerException, IOException,
ParserConfigurationException {
HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));//WordToHtmlUtils.loadDoc(new FileInputStream(inputFile));
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument());
wordToHtmlConverter.setPicturesManager( new PicturesManager()
{
@Override
public String savePicture( byte[] content,
PictureType pictureType, String suggestedName,
float widthInches, float heightInches )
{
return ""+suggestedName;
}
} );
wordToHtmlConverter.processDocument(wordDocument);
//save pictures
List pics=wordDocument.getPicturesTable().getAllPictures();
//文档中有图片时,对图片进行处理
if(pics!=null){
for(int i=0;i<pics.size();i++){
Picture pic = (Picture)pics.get(i);
System.out.println();
try {
//存放图片的位置,并写入图片内容
pic.writeImageContent(new FileOutputStream("D:/test/"
+ pic.suggestFullFileName()));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream out = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
out.close();
writeFile(new String(out.toByteArray()), outPutFile);
}
}
4,转化之后
test文件夹下就会生成两个文件,
一个转换后的html文件,一个是png图片文件,
这两个文件的位置可以自行在代码中修改地址,指定位置存放
5,html效果
格式,图片,表格都能正常转化
下载 poi-bin-3.9-20121203.tar.gz并解压,提取查看件Office文档所依赖的包。
6,原文参考链接:poi完美word转html(表格、图片、样式)
分割线----------------------------------------------------------------------------------------------------------------------------------分割线
改进
7,现在想支持docx文件
下载插件:
<!-- Excel中表格POI插件 -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.14</version>
</dependency>
<!-- poi插件 XSSFWorkbook和SXSSHWorkbook -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.14</version>
</dependency>
<!-- https://mvnrepository.com/artifact/fr.opensagres.xdocreport/org.apache.poi.xwpf.converter.xhtml -->
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
<version>1.0.6</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>
代码:
import java.io.*;
import org.apache.commons.io.FileUtils;
import org.apache.poi.xwpf.converter.core.BasicURIResolver;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
public class Word2Html {
// //主函数
public static void main(String argv[]) {
try {
String pathname= System.getProperty("user.dir") + "/src/main/resources/word/";
File file = new File(pathname);
File[] fileList = file.listFiles();
for (int i = 0; i < fileList.length; i++) {
File file2 = new File(fileList[i].toString());
File[] fileList2 = file2.listFiles();
for (int j = 0; j <fileList2.length ; j++) {
File[] fileName3 = fileList2[j].listFiles();
for (int k = 0; k <fileName3.length ; k++) {
convert2Html(fileName3[k].toString(),System.getProperty("user.dir") + "/src/main/resources/static/static/html/"+fileName3[k].getName().replace("docx","html"));
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
//写入文件内容
public static void writeFile(String content, String path) {
FileOutputStream fos = null;
BufferedWriter bw = null;
try {
File file = new File(path);
fos = new FileOutputStream(file);
//编码根据实际情况修改
bw = new BufferedWriter(new OutputStreamWriter(fos,"UTF-8"));
bw.write(content);
} catch (FileNotFoundException fnfe) {
fnfe.printStackTrace();
} catch (IOException ioe) {
ioe.printStackTrace();
} finally {
try {
if (bw != null){
bw.close();
}
if (fos != null){
fos.close();
}
} catch (IOException ie) {
}
}
}
//将文件内容输出,转换为html格式
public static void convert2Html(String fileName, String outPutFile)
throws Exception {
System.out.println(fileName);
try {
Class.forName("org.apache.poi.hwpf.HWPFDocument");
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
XWPFDocument wordDocument = new XWPFDocument(new FileInputStream(fileName));//WordToHtmlUtils.loadDoc(new FileInputStream(inputFile));
XHTMLOptions options = XHTMLOptions.create().indent(4);
// 导出图片
String filename=new File(fileName).getName();
String pathname = "/static/img/"+filename.substring(0,filename.lastIndexOf('.'))+"/";
File imageFolder = new File(System.getProperty("user.dir") + "/src/main/resources/static/"+pathname);
FileImageExtractor fileImageExtractor = new FileImageExtractor(imageFolder);
options.setExtractor(fileImageExtractor);
// URI resolver
BasicURIResolver fileURIResolver = new BasicURIResolver(pathname);
options.URIResolver(fileURIResolver);
OutputStream outputStream = new FileOutputStream(outPutFile);
XHTMLConverter.getInstance().convert(wordDocument, outputStream, options);
outputStream.flush();
outputStream.close();
String readFileToString = FileUtils.readFileToString(new File(outPutFile), "UTF-8");
readFileToString=readFileToString.replace("<head>","<head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"/>");
FileUtils.write(new File(outPutFile),readFileToString, "UTF-8");
}
}