POI 实现word(doc/docx)与excel(xls/xlsx)浏览器预览
一、环境准备
1.jdk:1.8
2.maven:3.6
3.springboot:2.2.2
二、MAVEN主要依赖
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>xdocreport</artifactId>
<version>2.0.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>1.4</version>
</dependency>
三、具体实现
1.docToHtml(doc格式)
@RequestMapping("/wordToHtml")
public void wordToHtml(HttpServletResponse response){
final String path = "C:\\usr\\local\\";
final String file = "5页.doc";
try{
InputStream input = new FileInputStream(path + file);
docToHtml(input, response);
}catch (Exception e){
e.printStackTrace();
}
}
public void docToHtml(InputStream input, HttpServletResponse response) throws Exception{
HWPFDocumentCore wordDocument = WordToHtmlUtils.loadDoc(input);
WordToHtmlConverter wordToHtmlConverter = new ImageConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()
);
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
// 清空response
response.reset();
OutputStream toClient = new BufferedOutputStream(response.getOutputStream());
response.setContentType("text/html");
response.setCharacterEncoding("UTF-8");
toClient.write(outStream.toByteArray());
toClient.flush();
toClient.close();
}
//图片处理
public class ImageConverter extends WordToHtmlConverter{
public ImageConverter(Document document) {
super(document);
}
@Override
protected void processImageWithoutPicturesManager(Element currentBlock, boolean inlined, Picture picture){
Element imgNode = currentBlock.getOwnerDocument().createElement("img");
StringBuffer sb = new StringBuffer();
sb.append(Base64.getMimeEncoder().encodeToString(picture.getRawContent()));
sb.insert(0, "data:" + picture.getMimeType() + ";base64,");
imgNode.setAttribute("src", sb.toString());
currentBlock.appendChild(imgNode);
}
}
2.docxToHtml(docx格式)
@RequestMapping("/wordToHtml")
public void wordToHtml(HttpServletResponse response){
final String path = "C:\\usr\\local\\";
final String file = "3.docx";
try{
InputStream input = new FileInputStream(path + file);
docxToHtml(input, response);
}catch (Exception e){
e.printStackTrace();
}
}
public void docxToHtml(InputStream inputStream, HttpServletResponse response) throws IOException {
XWPFDocument docxDocument = new XWPFDocument(inputStream);
XHTMLOptions options = XHTMLOptions.create();
//图片转base64
options.setImageManager(new Base64EmbedImgManager());
// 转换htm1
ByteArrayOutputStream htmlStream = new ByteArrayOutputStream();
XHTMLConverter.getInstance().convert(docxDocument, htmlStream, options);
// 清空response
response.reset();
OutputStream toClient = new BufferedOutputStream(response.getOutputStream());
response.setContentType("text/html");
response.setCharacterEncoding("UTF-8");
toClient.write(htmlStream.toByteArray());
toClient.flush();
toClient.close();
}
3、xls格式
/**
fileUrl为文件名
**/
@RequestMapping("/xlsTest")
public void xlsTest(String fileUrl, HttpServletResponse response) throws Exception {
final String path = "D:\\apache-tomcat-8\\apache-tomcat-8.5.77\\webapps\\download\\file\\incorrupt\\";
InputStream input=new FileInputStream(path+fileUrl);
HSSFWorkbook excelBook=new HSSFWorkbook(input);
ExcelToHtmlConverter excelToHtmlConverter = new ExcelToHtmlConverter (DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument() );
excelToHtmlConverter.processWorkbook(excelBook);
List pics = excelBook.getAllPictures();
if (pics != null) {
for (int i = 0; i < pics.size(); i++) {
Picture pic = (Picture) pics.get (i);
try {
pic.writeImageContent (new FileOutputStream (path + pic.suggestFullFileName() ) );
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
Document htmlDocument =excelToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource (htmlDocument);
StreamResult streamResult = new StreamResult (outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty (OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty (OutputKeys.INDENT, "yes");
serializer.setOutputProperty (OutputKeys.METHOD, "html");
serializer.transform (domSource, streamResult);
outStream.close();
response.reset();
OutputStream toClient = new BufferedOutputStream(response.getOutputStream());
response.setContentType("text/html");
response.setCharacterEncoding("UTF-8");
toClient.write(outStream.toByteArray());
toClient.flush();
toClient.close();
}
4、xlsx格式
import org.apache.poi.hssf.converter.ExcelToHtmlConverter;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.List;
public class TestDemo {
final static String path = "D:\\apache-tomcat-8\\apache-tomcat-8.5.77\\webapps\\download\\file\\incorrupt\\";
final static String file = "d1f028f4-8b80-4ff0-9756-8d386f157306_工作簿2.xlsx";
// private static final String EXCEL_XLS = "xls";
// private static final String EXCEL_XLSX = "xlsx";
public static void main(String[] args) {
try{
InputStream input = new FileInputStream(path +"/"+ file);
HSSFWorkbook excelBook = new HSSFWorkbook();
// //判断Excel文件将07+版本转换为03版本
// if(file.endsWith(EXCEL_XLS)){ //Excel 2003
// excelBook = new HSSFWorkbook(input);
// }
// else if(file.endsWith(EXCEL_XLSX)){ // Excel 2007/2010
Transform xls = new Transform();
XSSFWorkbook workbookOld = new XSSFWorkbook(input);
xls.transformXSSF(workbookOld, excelBook);
//}
ExcelToHtmlConverter excelToHtmlConverter = new ExcelToHtmlConverter (DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument() );
excelToHtmlConverter.processWorkbook(excelBook);
List pics = excelBook.getAllPictures();
if (pics != null) {
for (int i = 0; i < pics.size(); i++) {
Picture pic = (Picture) pics.get (i);
try {
pic.writeImageContent (new FileOutputStream(path + pic.suggestFullFileName() ) );
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
Document htmlDocument =excelToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource (htmlDocument);
StreamResult streamResult = new StreamResult (outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty (OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty (OutputKeys.INDENT, "yes");
serializer.setOutputProperty (OutputKeys.METHOD, "html");
serializer.transform (domSource, streamResult);
outStream.close();
//Excel转换成Html
String content = new String(outStream.toByteArray());
System.out.println(content);
}
catch(Exception e) {
e.printStackTrace();
}
}
}
xlsx格式转xls格式的工具类
import org.apache.poi.hssf.usermodel.*;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DataFormat;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.util.CellRangeAddress;
import org.apache.poi.xssf.usermodel.*;
import java.util.HashMap;
public class Transform {
private int lastColumn = 0;
private HashMap<Integer, HSSFCellStyle> styleMap = new HashMap();
public void transformXSSF(XSSFWorkbook workbookOld, HSSFWorkbook workbookNew) {
HSSFSheet sheetNew;
XSSFSheet sheetOld;
workbookNew.setMissingCellPolicy(workbookOld.getMissingCellPolicy());
for (int i = 0; i < workbookOld.getNumberOfSheets(); i++) {
sheetOld = workbookOld.getSheetAt(i);
sheetNew = workbookNew.getSheet(sheetOld.getSheetName());
sheetNew = workbookNew.createSheet(sheetOld.getSheetName());
this.transform(workbookOld, workbookNew, sheetOld, sheetNew);
}
}
private void transform(XSSFWorkbook workbookOld, HSSFWorkbook workbookNew,
XSSFSheet sheetOld, HSSFSheet sheetNew) {
sheetNew.setDisplayFormulas(sheetOld.isDisplayFormulas());
sheetNew.setDisplayGridlines(sheetOld.isDisplayGridlines());
sheetNew.setDisplayGuts(sheetOld.getDisplayGuts());
sheetNew.setDisplayRowColHeadings(sheetOld.isDisplayRowColHeadings());
sheetNew.setDisplayZeros(sheetOld.isDisplayZeros());
sheetNew.setFitToPage(sheetOld.getFitToPage());
sheetNew.setHorizontallyCenter(sheetOld.getHorizontallyCenter());
sheetNew.setMargin(Sheet.BottomMargin,
sheetOld.getMargin(Sheet.BottomMargin));
sheetNew.setMargin(Sheet.FooterMargin,
sheetOld.getMargin(Sheet.FooterMargin));
sheetNew.setMargin(Sheet.HeaderMargin,
sheetOld.getMargin(Sheet.HeaderMargin));
sheetNew.setMargin(Sheet.LeftMargin,
sheetOld.getMargin(Sheet.LeftMargin));
sheetNew.setMargin(Sheet.RightMargin,
sheetOld.getMargin(Sheet.RightMargin));
sheetNew.setMargin(Sheet.TopMargin, sheetOld.getMargin(Sheet.TopMargin));
sheetNew.setPrintGridlines(sheetNew.isPrintGridlines());
sheetNew.setRightToLeft(sheetNew.isRightToLeft());
sheetNew.setRowSumsBelow(sheetNew.getRowSumsBelow());
sheetNew.setRowSumsRight(sheetNew.getRowSumsRight());
sheetNew.setVerticallyCenter(sheetOld.getVerticallyCenter());
HSSFRow rowNew;
for (Row row : sheetOld) {
rowNew = sheetNew.createRow(row.getRowNum());
if (rowNew != null)
this.transform(workbookOld, workbookNew, (XSSFRow) row, rowNew);
}
for (int i = 0; i < this.lastColumn; i++) {
sheetNew.setColumnWidth(i, sheetOld.getColumnWidth(i));
sheetNew.setColumnHidden(i, sheetOld.isColumnHidden(i));
}
for (int i = 0; i < sheetOld.getNumMergedRegions(); i++) {
CellRangeAddress merged = sheetOld.getMergedRegion(i);
sheetNew.addMergedRegion(merged);
}
}
private void transform(XSSFWorkbook workbookOld, HSSFWorkbook workbookNew,
XSSFRow rowOld, HSSFRow rowNew) {
HSSFCell cellNew;
rowNew.setHeight(rowOld.getHeight());
for (Cell cell : rowOld) {
cellNew = rowNew.createCell(cell.getColumnIndex(),
cell.getCellType());
if (cellNew != null)
this.transform(workbookOld, workbookNew, (XSSFCell) cell,
cellNew);
}
this.lastColumn = Math.max(this.lastColumn, rowOld.getLastCellNum());
}
private void transform(XSSFWorkbook workbookOld, HSSFWorkbook workbookNew,
XSSFCell cellOld, HSSFCell cellNew) {
cellNew.setCellComment(cellOld.getCellComment());
Integer hash = cellOld.getCellStyle().hashCode();
if (this.styleMap != null && !this.styleMap.containsKey(hash)) {
this.transform(workbookOld, workbookNew, hash,
cellOld.getCellStyle(),
(HSSFCellStyle) workbookNew.createCellStyle());
}
cellNew.setCellStyle(this.styleMap.get(hash));
switch (cellOld.getCellType().toString()) {
case "BLANK":
break;
case "BOOLEAN":
cellNew.setCellValue(cellOld.getBooleanCellValue());
break;
case "ERROR":
cellNew.setCellValue(cellOld.getErrorCellValue());
break;
case "FORMULA":
cellNew.setCellValue(cellOld.getCellFormula());
break;
case "NUMERIC":
cellNew.setCellValue(cellOld.getNumericCellValue());
break;
case "STRING":
cellNew.setCellValue(cellOld.getStringCellValue());
break;
default:
System.out.println("transform: Unbekannter Zellentyp "
+ cellOld.getCellType());
}
}
private void transform(XSSFWorkbook workbookOld, HSSFWorkbook workbookNew,
Integer hash, XSSFCellStyle styleOld, HSSFCellStyle styleNew) {
styleNew.setAlignment(styleOld.getAlignment());
styleNew.setBorderBottom(styleOld.getBorderBottom());
styleNew.setBorderLeft(styleOld.getBorderLeft());
styleNew.setBorderRight(styleOld.getBorderRight());
styleNew.setBorderTop(styleOld.getBorderTop());
//styleNew.setDataFormat(this.transform(workbookOld, workbookNew,
// styleOld.getDataFormat()));
styleNew.setFillBackgroundColor(styleOld.getFillBackgroundColor());
styleNew.setFillForegroundColor(styleOld.getFillForegroundColor());
styleNew.setFillPattern(styleOld.getFillPattern());
styleNew.setFont(this.transform(workbookNew,
(XSSFFont) styleOld.getFont()));
styleNew.setHidden(styleOld.getHidden());
styleNew.setIndention(styleOld.getIndention());
styleNew.setLocked(styleOld.getLocked());
styleNew.setVerticalAlignment(styleOld.getVerticalAlignment());
styleNew.setWrapText(styleOld.getWrapText());
this.styleMap.put(hash, styleNew);
}
private short transform(XSSFWorkbook workbookOld, HSSFWorkbook workbookNew,
short index) {
DataFormat formatOld = workbookOld.createDataFormat();
DataFormat formatNew = workbookNew.createDataFormat();
return formatNew.getFormat(formatOld.getFormat(index));
}
private HSSFFont transform(HSSFWorkbook workbookNew, XSSFFont fontOld) {
HSSFFont fontNew = workbookNew.createFont();
//fontNew.setBoldweight(fontOld.getBoldweight());
fontNew.setCharSet(fontOld.getCharSet());
fontNew.setColor(fontOld.getColor());
fontNew.setFontName(fontOld.getFontName());
fontNew.setFontHeight(fontOld.getFontHeight());
fontNew.setItalic(fontOld.getItalic());
fontNew.setStrikeout(fontOld.getStrikeout());
fontNew.setTypeOffset(fontOld.getTypeOffset());
fontNew.setUnderline(fontOld.getUnderline());
return fontNew;
}
}
四、总结
1.主要几个maven包的依赖版本需要一致
2.文档需要标准的word文档,举个例子,从boss直聘上下载下来的简历不能预览,因为里面内容实际是html格式,会出现异常:
Docment is really HTML File,需要把文件另存为标准word格式
3.不能直接修改文件后缀名预览,虽然office能打开,但是不是标准word格式,需要另存为你想要的格式(doc,docx),否则会出现异常java.lang.IllegalArgumentException: The document is really a OOXML file
4.尝试过spire.doc,用的是免费版,文档超过三页不能预览,这一方面官网给出了解释,最终选定poi这个方案
版权声明:本文为linanqi_java原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://blog.csdn.net/linanqi_java/article/details/109291562