docx、ppt、xls、pdf文件转html(转)

此博客转自:https://blog.csdn.net/fyhx2010/article/details/69569310

场景:

后台上传的文档要再前端app上在线预览


解决办法:

将文档转成html用于前端显示


maven引入

[plain]  view plain  copy
  1. <dependency>  
  2.             <groupId>fr.opensagres.xdocreport</groupId>  
  3.             <artifactId>fr.opensagres.xdocreport.document</artifactId>  
  4.             <version>1.0.5</version>  
  5.         </dependency>  
  6.         <dependency>    
  7.             <groupId>fr.opensagres.xdocreport</groupId>    
  8.             <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>    
  9.             <version>1.0.5</version>    
  10.         </dependency>  
  11.         <dependency>  
  12.             <groupId>org.apache.poi</groupId>  
  13.             <artifactId>poi-ooxml</artifactId>  
  14.             <version>3.9</version>  
  15.         </dependency>  
  16.         <dependency>  
  17.             <groupId>org.apache.poi</groupId>  
  18.             <artifactId>poi-scratchpad</artifactId>  
  19.             <version>3.9</version>  
  20.         </dependency>  


具体代码:

1、docx转html

[java]  view plain  copy
  1. /** 
  2.      * docx文件转html 
  3.      * @param tempContextUrl 项目访问名 
  4.      * @return 
  5.      */  
  6.     public int Word2007ToHtml(String tempContextUrl) {  
  7.         int rv = 0;  
  8.         try {  
  9.             String path =  presentationDto.getWordPath();  
  10.             //word路径  
  11.             String wordPath = path.substring(0, path.indexOf("upload")+6);  
  12.             //word文件名  
  13.             String wordName = path.substring(path.lastIndexOf(File.separator)+1,path.lastIndexOf("."));  
  14.             //后缀  
  15.             String suffix = path.substring(path.lastIndexOf("."));  
  16.             //生成html路径  
  17.             String htmlPath = wordPath + File.separator + System.currentTimeMillis() + "_show" + File.separator;  
  18.             //生成html文件名  
  19.             String htmlName = System.currentTimeMillis() + ".html";  
  20.             //图片路径  
  21.             String imagePath = htmlPath + "image" + File.separator;  
  22.                
  23.             //判断html文件是否存在  
  24.             File htmlFile = new File(htmlPath + htmlName);  
  25.                    
  26.             //word文件  
  27.             File wordFile = new File(wordPath + File.separator + wordName + suffix);   
  28.                
  29.             // 1) 加载word文档生成 XWPFDocument对象   
  30.             InputStream in = new FileInputStream(wordFile);   
  31.             XWPFDocument document = new XWPFDocument(in);   
  32.    
  33.             // 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)   
  34.             File imgFolder = new File(imagePath);  
  35.             XHTMLOptions options = XHTMLOptions.create();  
  36.             options.setExtractor(new FileImageExtractor(imgFolder));  
  37.             //html中图片的路径 相对路径   
  38.             options.URIResolver(new BasicURIResolver("image"));  
  39.             options.setIgnoreStylesIfUnused(false);   
  40.             options.setFragment(true);   
  41.                
  42.             // 3) 将 XWPFDocument转换成XHTML  
  43.             //生成html文件上级文件夹  
  44.             File folder = new File(htmlPath);  
  45.             if(!folder.exists()){   
  46.               folder.mkdirs();   
  47.             }  
  48.             OutputStream out = new FileOutputStream(htmlFile);   
  49.             XHTMLConverter.getInstance().convert(document, out, options);  
  50.   
  51.             // 4) 转换为项目访问路径  
  52.             String absolutePath = htmlFile.getAbsolutePath();  
  53.             htmlPath = tempContextUrl + absolutePath.substring(absolutePath.indexOf("upload"));  
  54.             presentationDto.setHtmlPath(htmlPath);  
  55.         } catch (FileNotFoundException e) {  
  56.             e.printStackTrace();  
  57.             return rv;   
  58.         } catch (XWPFConverterException e) {  
  59.             e.printStackTrace();  
  60.             return rv;   
  61.         } catch (IOException e) {  
  62.             e.printStackTrace();  
  63.             return rv;   
  64.         }  
  65.         rv = 1;  
  66.         return rv;   
  67.     }  

2、xls转html

[java]  view plain  copy
  1. private int xlsToHtml(String tempContextUrl){  
  2.         int rv = 0;  
  3.         String path =  presentationDto.getWordPath();  
  4.         //word路径  
  5.         String wordPath = path.substring(0, path.indexOf("upload")+6) + File.separator;  
  6.         //word文件名  
  7.         String wordName = path.substring(path.lastIndexOf(File.separator)+1);  
  8.         try {  
  9.             InputStream input=new FileInputStream(wordPath+wordName);  
  10.             HSSFWorkbook excelBook=new HSSFWorkbook(input);  
  11.             ExcelToHtmlConverter excelToHtmlConverter = new ExcelToHtmlConverter (DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument() );  
  12.             excelToHtmlConverter.processWorkbook(excelBook);  
  13.             List pics = excelBook.getAllPictures();  
  14.             if (pics != null) {  
  15.                 for (int i = 0; i < pics.size(); i++) {  
  16.                     Picture pic = (Picture) pics.get (i);  
  17.                     try {  
  18.                         pic.writeImageContent (new FileOutputStream (wordPath + pic.suggestFullFileName() ) );  
  19.                     } catch (FileNotFoundException e) {  
  20.                         e.printStackTrace();  
  21.                     }  
  22.                 }  
  23.             }  
  24.             Document htmlDocument =excelToHtmlConverter.getDocument();  
  25.             ByteArrayOutputStream outStream = new ByteArrayOutputStream();  
  26.             DOMSource domSource = new DOMSource (htmlDocument);  
  27.             StreamResult streamResult = new StreamResult (outStream);  
  28.             TransformerFactory tf = TransformerFactory.newInstance();  
  29.             Transformer serializer = tf.newTransformer();  
  30.             serializer.setOutputProperty (OutputKeys.ENCODING, "utf-8");  
  31.             serializer.setOutputProperty (OutputKeys.INDENT, "yes");  
  32.             serializer.setOutputProperty (OutputKeys.METHOD, "html");  
  33.             serializer.transform (domSource, streamResult);  
  34.             outStream.close();  
  35.       
  36.             String content = new String (outStream.toByteArray(),"utf-8");  
  37.       
  38.             String uuid = UidUtil.generateUUID();  
  39.             FileUtils.writeStringToFile(new File(wordPath, uuid+".html"), content, "utf-8");  
  40.             presentationDto.setHtmlPath(tempContextUrl + "upload" + File.separator + uuid+".html");  
  41.         } catch (Exception e) {  
  42.             e.printStackTrace();  
  43.             return rv;   
  44.         }  
  45.         rv = 1;  
  46.         return rv;   
  47.     }  

3、ppt转html

其实只是ppt转图片,有了图片后放到页面上去显示。

[java]  view plain  copy
  1. /** 
  2.      * ppt转html 
  3.      * @param tempContextUrl 
  4.      * @return 
  5.      */  
  6.     private int pptToHtml(String tempContextUrl){  
  7.         int rv = 0;  
  8.         String path = presentationDto.getWordPath();  
  9.         //word路径  
  10.         String wordPath = path.substring(0, path.indexOf("upload")+6);  
  11.         //文件夹名  
  12.         String folderName = UidUtil.generateUUID();  
  13.         List<String> imgList = new ArrayList<String>();  
  14.         File file = new File(path);  
  15.           
  16.         File folder = new File(wordPath + File.separator + folderName);  
  17.         try {     
  18.             folder.mkdirs();  
  19.             FileInputStream is = new FileInputStream(file);     
  20.             SlideShow ppt = new SlideShow(is);     
  21.             is.close();     
  22.             Dimension pgsize = ppt.getPageSize();     
  23.             org.apache.poi.hslf.model.Slide[] slide = ppt.getSlides();     
  24.             for (int i = 0; i < slide.length; i++) {  
  25.                 TextRun[] truns = slide[i].getTextRuns();        
  26.                 for ( int k=0;k<truns.length;k++){        
  27.                    RichTextRun[] rtruns = truns[k].getRichTextRuns();        
  28.                   for(int l=0;l<rtruns.length;l++){        
  29.                         rtruns[l].setFontIndex(1);        
  30.                         rtruns[l].setFontName("宋体");    
  31.                    }        
  32.                 }        
  33.                 BufferedImage img = new BufferedImage(pgsize.width,pgsize.height, BufferedImage.TYPE_INT_RGB);     
  34.                 Graphics2D graphics = img.createGraphics();     
  35.                 graphics.setPaint(Color.BLUE);     
  36.                 graphics.fill(new Rectangle2D.Float(00, pgsize.width, pgsize.height));     
  37.                 slide[i].draw(graphics);     
  38.   
  39.                 // 这里设置图片的存放路径和图片的格式(jpeg,png,bmp等等),注意生成文件路径     
  40.                 String imgName = File.separator + folderName + File.separator +"pict_"+ (i + 1) + ".jpeg";  
  41.                   
  42.                 FileOutputStream out = new FileOutputStream(wordPath + imgName);     
  43.                 javax.imageio.ImageIO.write(img, "jpeg", out);  
  44.                 out.close();     
  45.                   
  46.                 imgList.add(File.separator + "upload" + imgName);  
  47.             }  
  48.         } catch (FileNotFoundException e) {  
  49.             e.printStackTrace();  
  50.             return rv;  
  51.         } catch (IOException e) {  
  52.             e.printStackTrace();  
  53.             return rv;  
  54.         }  
  55.         rv = createHtml(wordPath,imgList, tempContextUrl);  
  56.         return rv;  
  57.     }  
  58.       
  59.     /** 
  60.      * ppt转html时生成html 
  61.      * @param wordPath  upload根目录 
  62.      * @param imgList   所有幻灯片路径 
  63.      * @param tempContextUrl    项目访问路径 
  64.      * @return 
  65.      */  
  66.     private int createHtml(String wordPath,List<String> imgList,String tempContextUrl){  
  67.         int rv = 0;  
  68.         StringBuilder sb = new StringBuilder("<!doctype html><html><head><meta charset='utf-8'><title>无标题文档</title></head><body>");  
  69.         if (imgList != null && !imgList.isEmpty()) {  
  70.             for (String img : imgList) {  
  71.                 sb.append("<img src='" + img + "' /><br>");  
  72.             }  
  73.         }  
  74.         sb.append("</body></html>");  
  75.           
  76.         String uuid = UidUtil.generateUUID();  
  77.         try {  
  78.             File file = new File(wordPath + File.separator + uuid + ".html");  
  79.             BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file),"UTF-8"));  
  80.             bufferedWriter.write(sb.toString());  
  81.             bufferedWriter.close();  
  82.         } catch (IOException e) {  
  83.             e.printStackTrace();  
  84.             return rv;  
  85.         }  
  86.         presentationDto.setHtmlPath(tempContextUrl + "upload" + File.separator + uuid+".html");  
  87.         return 1;  
  88.     }  

4、pdf转html

原理同ppt转html一样

[java]  view plain  copy
  1. /** 
  2.      * pdf转html 
  3.      * @param tempContextUrl 
  4.      * @return 
  5.      */  
  6.     private int pdfToHtml(String tempContextUrl){  
  7.         int rv = 0;  
  8.         String path = presentationDto.getWordPath();  
  9.         //word路径  
  10.         String wordPath = path.substring(0, path.indexOf("upload")+6);  
  11.         //文件夹名  
  12.         String folderName = UidUtil.generateUUID();  
  13.         List<String> imgList = new ArrayList<String>();  
  14.         File file = new File(path);  
  15.         try {  
  16.             PDDocument doc = PDDocument.load(path);  
  17.             int pageCount = doc.getPageCount();   
  18.             System.out.println(pageCount);   
  19.             List pages = doc.getDocumentCatalog().getAllPages();   
  20.             for(int i=0;i<pages.size();i++){  
  21.                 PDPage page = (PDPage)pages.get(i);   
  22.                 BufferedImage image = page.convertToImage();   
  23.                 Iterator iter = ImageIO.getImageWritersBySuffix("jpg");   
  24.                 ImageWriter writer = (ImageWriter)iter.next();   
  25.                 String imgName = File.separator + folderName + File.separator +i+".jpg";  
  26.                 File folder = new File(wordPath + File.separator + folderName); //先创建文件夹  
  27.                 folder.mkdirs();  
  28.                 File outFile = new File(wordPath + imgName);    //再创建文件  
  29.                 imgList.add(File.separator + "upload" + imgName);  
  30.                 outFile.createNewFile();  
  31.                 FileOutputStream out = new FileOutputStream(outFile);   
  32.                 ImageOutputStream outImage = ImageIO.createImageOutputStream(out);   
  33.                 writer.setOutput(outImage);   
  34.                 writer.write(new IIOImage(image,null,null));   
  35.             }  
  36.             doc.close();  
  37.         } catch (FileNotFoundException e) {  
  38.             e.printStackTrace();  
  39.             return rv;  
  40.         } catch (IOException e) {  
  41.             e.printStackTrace();  
  42.             return rv;  
  43.         }  
  44.         rv = createHtml(wordPath, imgList, tempContextUrl);  
  45.         return 1;  
  46.     }  


  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值