POI 将Excle2003，Excle2007，word2003,word2007转换为html

最新推荐文章于 2024-08-19 15:26:53 发布

chuizang4830

最新推荐文章于 2024-08-19 15:26:53 发布

阅读量125

点赞数

文章标签： json java 人工智能

原文链接：https://my.oschina.net/ysySuperman/blog/749100

版权

上一篇是写了关于解析ppt，这一篇是关于Excle,Word的，其实用poi解析excle是非常好用的，参考了网上大神的东西，自己添加修改了些东西，都是写代码的苦命兄弟，拿出来共同参考下，有意见大家指正。遇到的问题是，如果用json将这些html代码返回的页面是不行的，因为json不支持html格式输出，折衷的办法是通过encodeURI编码，然后DecodeURI解码，但是全篇解码会有问题，有些字符如“=”，“；”等无法解析完全。所以不太建议用json，如果非用不可，最好手动解码（一听就知道是个很痛苦的事情），但还是会让html有瑕疵

package com.ysy.officeRead.controller;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;

import java.io.OutputStreamWriter;
import java.io.StringWriter;

import java.util.List;

import javax.xml.parsers.DocumentBuilderFactory;

import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.poi.hssf.converter.ExcelToHtmlConverter;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;

import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.converter.core.BasicURIResolver;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

import org.w3c.dom.Document;

public class OfficeBeRead {

   /**
   *url：标示上传文件在服务器本地的全路径，用来创建图片储存文件夹，使用uuID作为文件夹名称，挺恶心的事情
   *projectPath：文件在服务器上的路径
   */
   public String poiWord2003ToHtml(String url, String projectPath) {
       String pathString = url.substring(0, url.lastIndexOf("."));
       String proString2 = projectPath.substring(0, projectPath.lastIndexOf("."))+"/";
       String file = "1.doc";
       String content = "";

       //创建文件夹

       try {

           InputStream inputStream = new FileInputStream(url);
           HWPFDocument worDocument = new HWPFDocument(inputStream);

           WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory
                   .newInstance().newDocumentBuilder().newDocument());
           wordToHtmlConverter.setPicturesManager(new PicturesManager() {

               public String savePicture(byte[] content, PictureType pictureType, String suggestedName,
                       float widthInches, float heightInches) {
                   // TODO Auto-generated method stub
                   return suggestedName;
               }
           });

           wordToHtmlConverter.processDocument(worDocument);
           List pics = worDocument.getPicturesTable().getAllPictures();
           if(pics!=null){
               for (int i = 0; i < pics.size(); i++) {
                   Picture picture = (Picture) pics.get(i);

                   File file2 = new File(pathString,picture.suggestFullFileName());
                   if(!file2.exists()&&!file2.isDirectory()){
                       file2.getParentFile().mkdirs();
                       file2.createNewFile();
                   }
                   picture.writeImageContent(new FileOutputStream(pathString+"/"+picture.suggestFullFileName()));
               }
           }

           Document htmlDocument = wordToHtmlConverter.getDocument();
           ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
           DOMSource domSource = new DOMSource(htmlDocument);
           StreamResult streamResult = new StreamResult(outputStream);

           TransformerFactory tfFactory = TransformerFactory.newInstance();
           Transformer serializer = tfFactory.newTransformer();
           serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
           serializer.setOutputProperty(OutputKeys.INDENT, "yes");
           serializer.setOutputProperty(OutputKeys.METHOD, "html");
           serializer.transform(domSource, streamResult);
           outputStream.close();

           content = new String(outputStream.toByteArray());

           //图片路径替换


           FileUtils.write(new File(pathString, "1.html"), content, "utf-8");

           content = replaceAllStr(content, proString2);

       } catch (Exception e) {
           // TODO: handle exception
           e.printStackTrace();
       }


       return content;

   }

   /**
   * url为文件上传后所在路径
   * projectPath 为文件所在项目下的访问路径
   */
   public String poiWord2007ToHtml(String url,String projectPath){

       String sourceFileNameString = url; //目标文件路径
       String imagePathString = url.substring(0, url.lastIndexOf("."));
       String targetFileNameString = imagePathString+"1.html";

       String proString2 = projectPath.substring(0, projectPath.lastIndexOf("."))+"/";

       String out = "";
       FileOutputStream outputStream = null;
       OutputStreamWriter outputStreamWriter = null;
       try {
           XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileNameString));
           XHTMLOptions options = XHTMLOptions.create();
           //存放图片的文件夹
           options.setExtractor(new FileImageExtractor(new File(imagePathString)));
           //html中图片的路径
           options.URIResolver(new BasicURIResolver("/"));

           File file2 = new File(targetFileNameString);
           if(!file2.exists()&&!file2.isDirectory()){
               file2.getParentFile().mkdirs();
               file2.createNewFile();
           }

           outputStream = new FileOutputStream(targetFileNameString);
           outputStreamWriter = new OutputStreamWriter(outputStream);
           XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
           xhtmlConverter.convert(document, outputStreamWriter, options);

           FileInputStream file = new FileInputStream( new File(targetFileNameString));
               // size 为字串的长度，这里一次性读完

int size=file.available();

byte[] buffer=new byte[size];

file.read(buffer);

file.close();

out=new String(buffer);

              //这是用来解决生成的汉字是Uncio十进制码的
           out = StringEscapeUtils.unescapeHtml(out);

           System.out.println(out);

           out = replaceAllStr(out, proString2);

       } catch (Exception e) {
           // TODO: handle exception
           e.printStackTrace();
       }finally{
           if(outputStream != null){
               try {
                   outputStream.close();
               } catch (IOException e) {
                   // TODO Auto-generated catch block
                   e.printStackTrace();
               }
           }

           if(outputStreamWriter != null){
               try {
                   outputStreamWriter.close();
               } catch (IOException e) {
                   // TODO Auto-generated catch block
                   e.printStackTrace();
               }
           }


       }


       return out;

   }

/*   public static void main(String[] args) {
       System.out.println(new OfficeBeRead().poiWord2003ToHtml());
   }*/

   /**
   *替换字符串中指定字符位置添加指定的字符串
   *
   *在此用来替换图片源路径
   *
   */
   public String replaceAllStr(String content,String imgurl){

       String[] ss = content.split("<img src="+"\"");
       String sssString = "";
       if (ss.length>1) {
           for (int i = 0; i < ss.length-1; i++) {
               sssString = sssString+ss[i]+"<img src="+"\""+imgurl;
           }

           sssString = sssString + ss[ss.length-1];
       }

       return sssString;

   }


   /**
   *poi将Excel转换为html
   *该方法无法解析图片
   *
   */
   public String PoiExcel2003ToHtml(String url,String projectPath){

       File excelFile = new File(url);

       InputStream iStream = null;
       FileOutputStream outputStream = null;
       StringWriter writer = null;
       String imagePathString = url.substring(0, url.lastIndexOf("."));
       String htmlFile = imagePathString+"1.html";
       File htmlfile2 = new File(htmlFile);
       File filep = new File(htmlfile2.getParent());
       String content = "";
       try {
           if(excelFile.exists()){
               if(!filep.exists()){
                   filep.mkdirs();
               }
               iStream = new FileInputStream(excelFile); //初始化文件
               HSSFWorkbook workbook = new HSSFWorkbook(iStream);
               ExcelToHtmlConverter converter = new ExcelToHtmlConverter(DocumentBuilderFactory
                       .newInstance().newDocumentBuilder().newDocument());
               converter.processWorkbook(workbook);

               writer = new StringWriter();
               Transformer serializer = TransformerFactory.newInstance().newTransformer();
               serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
               serializer.setOutputProperty(OutputKeys.INDENT, "yes");
               serializer.setOutputProperty(OutputKeys.METHOD, "html");
               serializer.transform(
                       new DOMSource(converter.getDocument()),
                       new StreamResult(writer));
               outputStream = new FileOutputStream(htmlFile);
               outputStream.write(writer.toString().getBytes("UTF-8"));

               FileInputStream fis = new FileInputStream(htmlfile2); //获取html文件输入流

               int size = fis.available();
               byte[] buffer=new byte[size];

fis.read(buffer);

fis.close();

content = new String(buffer);
System.out.println(content);

               outputStream.flush();
               outputStream.close();
               writer.close();


           }

       } catch (Exception e) {
           // TODO: handle exception
           e.printStackTrace();
       } finally{
           if(iStream != null){
               try {
                   iStream.close();
               } catch (IOException e) {
                   // TODO Auto-generated catch block
                   e.printStackTrace();
               }
           }
           if(outputStream != null){
               try {
                   outputStream.close();
               } catch (IOException e) {
                   // TODO Auto-generated catch block
                   e.printStackTrace();
               }
           }
           if(writer!=null){
               try {
                   writer.close();
               } catch (IOException e) {
                   // TODO Auto-generated catch block
                   e.printStackTrace();
               }
           }

       }

       return content;

   }

   /**
* POI 解析Excel2007版，生成HTML
* @param fileName 文件(含地址)
* @return 解析出来的HTML页面String
*/
public String PoiExcel2007ToHtml(String url,String projectPath){
StringBuffer content = new StringBuffer();
XSSFWorkbook xwb = null;
try{
// 构造 XSSFWorkbook 对象，strPath 传入文件路径
xwb = new XSSFWorkbook(url);
content.append("<html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"><title>Parse Excel With POI</title></head><body>");
// 循环工作表Sheet
for (int numSheet = 0; numSheet < xwb.getNumberOfSheets(); numSheet++) {
XSSFSheet xSheet = xwb.getSheetAt(numSheet);
if (xSheet == null) {
continue;
}

content.append("<h3 valign='middle' align='center'>"+xSheet.getSheetName()+"</h3>");

content.append("<table valign='middle' align='center' border=1 cellspacing=0 cellpadding=1>");

// 循环行Row
for (int rowNum = 0; rowNum <= xSheet.getLastRowNum(); rowNum++) {
XSSFRow xRow = xSheet.getRow(rowNum);
if (xRow == null) {
continue;
}
content.append("<tr align='middle'>");
// 循环列Cell
for (int cellNum = 0; cellNum <= xRow.getLastCellNum(); cellNum++) {
XSSFCell xCell = xRow.getCell(cellNum);
if (xCell == null || "".equals(xCell)) {
content.append("<td>").append(" ").append("</td>");
}else if (xCell.getCellType() == XSSFCell.CELL_TYPE_BOOLEAN) {
content.append("<td>").append(" ").append(xCell.getBooleanCellValue()).append("</td>");
} else if (xCell.getCellType() == XSSFCell.CELL_TYPE_NUMERIC) {
content.append("<td>").append(" ").append(this.doubleToString(xCell.getNumericCellValue())).append("</td>");
} else{
content.append("<td>").append(" ").append(xCell.getStringCellValue()).append("</td>");
}
}
content.append("</tr>");
}
content.append("</table>");
}
content.append("</body></html>");

}catch(Exception e){
e.printStackTrace();
System.out.println("POI解析Excel2007错误");
}
return content.toString();
}


/**
* change double variable into string type
* @param d
* @return
*/
public String doubleToString(double d){
String str = Double.valueOf(d).toString();
String temp = str;
String result = "";
if(str.indexOf("E")>2)
result = str.substring(0,1) + temp.substring(2, str.indexOf("E"));
else{
if(str.indexOf(".0")>0)
result = str.substring(0,str.indexOf(".0")) ;
else
result = str;
}
return result;
}
}

转载于:https://my.oschina.net/ysySuperman/blog/749100

chuizang4830

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
POI 将Excle2003，Excle2007，word2003,word2007转换为html

上一篇是写了关于解析ppt，这一篇是关于Excle,Word的，其实用poi解析excle是非常好用的，参考了网上大神的东西，自己添加修改了些东西，都是写代码的苦命兄弟，拿出来共同参考下，有意见大家指正。遇到的问题是，如果用json将这些html代码返回的页面是不行的，因为json不支持htm...
复制链接

扫一扫