docx、ppt、xls、pdf文件转html


场景:

后台上传的文档要再前端app上在线预览


解决办法:

将文档转成html用于前端显示


maven引入

<dependency>
		    <groupId>fr.opensagres.xdocreport</groupId>
		    <artifactId>fr.opensagres.xdocreport.document</artifactId>
		    <version>1.0.5</version>
		</dependency>
		<dependency>  
		    <groupId>fr.opensagres.xdocreport</groupId>  
		    <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>  
		    <version>1.0.5</version>  
		</dependency>
		<dependency>
		    <groupId>org.apache.poi</groupId>
		    <artifactId>poi-ooxml</artifactId>
		    <version>3.9</version>
		</dependency>
		<dependency>
		    <groupId>org.apache.poi</groupId>
		    <artifactId>poi-scratchpad</artifactId>
		    <version>3.9</version>
		</dependency>


具体代码:

1、docx转html

/**
	 * docx文件转html
	 * @param tempContextUrl 项目访问名
	 * @return
	 */
	public int Word2007ToHtml(String tempContextUrl) {
		int rv = 0;
	    try {
			String path =  presentationDto.getWordPath();
			//word路径
			String wordPath = path.substring(0, path.indexOf("upload")+6);
			//word文件名
			String wordName = path.substring(path.lastIndexOf(File.separator)+1,path.lastIndexOf("."));
			//后缀
			String suffix = path.substring(path.lastIndexOf("."));
			//生成html路径
			String htmlPath = wordPath + File.separator + System.currentTimeMillis() + "_show" + File.separator;
			//生成html文件名
			String htmlName = System.currentTimeMillis() + ".html";
			//图片路径
			String imagePath = htmlPath + "image" + File.separator;
			 
			//判断html文件是否存在
			File htmlFile = new File(htmlPath + htmlName);
			     
			//word文件
			File wordFile = new File(wordPath + File.separator + wordName + suffix); 
			 
			// 1) 加载word文档生成 XWPFDocument对象 
			InputStream in = new FileInputStream(wordFile); 
			XWPFDocument document = new XWPFDocument(in); 
 
			// 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录) 
			File imgFolder = new File(imagePath);
			XHTMLOptions options = XHTMLOptions.create();
			options.setExtractor(new FileImageExtractor(imgFolder));
			//html中图片的路径 相对路径 
			options.URIResolver(new BasicURIResolver("image"));
			options.setIgnoreStylesIfUnused(false); 
			options.setFragment(true); 
			 
			// 3) 将 XWPFDocument转换成XHTML
			//生成html文件上级文件夹
			File folder = new File(htmlPath);
			if(!folder.exists()){ 
			  folder.mkdirs(); 
			}
			OutputStream out = new FileOutputStream(htmlFile); 
			XHTMLConverter.getInstance().convert(document, out, options);

			// 4) 转换为项目访问路径
			String absolutePath = htmlFile.getAbsolutePath();
			htmlPath = tempContextUrl + absolutePath.substring(absolutePath.indexOf("upload"));
			presentationDto.setHtmlPath(htmlPath);
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		    return rv; 
		} catch (XWPFConverterException e) {
			e.printStackTrace();
		    return rv; 
		} catch (IOException e) {
			e.printStackTrace();
		    return rv; 
		}
		rv = 1;
	    return rv; 
	}

2、xls转html

private int xlsToHtml(String tempContextUrl){
		int rv = 0;
		String path =  presentationDto.getWordPath();
		//word路径
		String wordPath = path.substring(0, path.indexOf("upload")+6) + File.separator;
		//word文件名
		String wordName = path.substring(path.lastIndexOf(File.separator)+1);
		try {
			InputStream input=new FileInputStream(wordPath+wordName);
			HSSFWorkbook excelBook=new HSSFWorkbook(input);
			ExcelToHtmlConverter excelToHtmlConverter = new ExcelToHtmlConverter (DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument() );
			excelToHtmlConverter.processWorkbook(excelBook);
			List pics = excelBook.getAllPictures();
			if (pics != null) {
			    for (int i = 0; i < pics.size(); i++) {
			        Picture pic = (Picture) pics.get (i);
			        try {
			            pic.writeImageContent (new FileOutputStream (wordPath + pic.suggestFullFileName() ) );
			        } catch (FileNotFoundException e) {
			            e.printStackTrace();
			        }
			    }
			}
			Document htmlDocument =excelToHtmlConverter.getDocument();
			ByteArrayOutputStream outStream = new ByteArrayOutputStream();
			DOMSource domSource = new DOMSource (htmlDocument);
			StreamResult streamResult = new StreamResult (outStream);
			TransformerFactory tf = TransformerFactory.newInstance();
			Transformer serializer = tf.newTransformer();
			serializer.setOutputProperty (OutputKeys.ENCODING, "utf-8");
			serializer.setOutputProperty (OutputKeys.INDENT, "yes");
			serializer.setOutputProperty (OutputKeys.METHOD, "html");
			serializer.transform (domSource, streamResult);
			outStream.close();
	
			String content = new String (outStream.toByteArray(),"utf-8");
	
			String uuid = UidUtil.generateUUID();
			FileUtils.writeStringToFile(new File(wordPath, uuid+".html"), content, "utf-8");
			presentationDto.setHtmlPath(tempContextUrl + "upload" + File.separator + uuid+".html");
		} catch (Exception e) {
			e.printStackTrace();
		    return rv; 
		}
		rv = 1;
	    return rv; 
	}

3、ppt转html

其实只是ppt转图片,有了图片后放到页面上去显示。

/**
	 * ppt转html
	 * @param tempContextUrl
	 * @return
	 */
	private int pptToHtml(String tempContextUrl){
		int rv = 0;
		String path = presentationDto.getWordPath();
		//word路径
		String wordPath = path.substring(0, path.indexOf("upload")+6);
		//文件夹名
		String folderName = UidUtil.generateUUID();
		List<String> imgList = new ArrayList<String>();
		File file = new File(path);
		
		File folder = new File(wordPath + File.separator + folderName);
		try {   
			folder.mkdirs();
            FileInputStream is = new FileInputStream(file);   
            SlideShow ppt = new SlideShow(is);   
            is.close();   
            Dimension pgsize = ppt.getPageSize();   
            org.apache.poi.hslf.model.Slide[] slide = ppt.getSlides();   
            for (int i = 0; i < slide.length; i++) {
                TextRun[] truns = slide[i].getTextRuns();      
                for ( int k=0;k<truns.length;k++){      
                   RichTextRun[] rtruns = truns[k].getRichTextRuns();      
                  for(int l=0;l<rtruns.length;l++){      
                        rtruns[l].setFontIndex(1);      
                        rtruns[l].setFontName("宋体");  
                   }      
                }      
                BufferedImage img = new BufferedImage(pgsize.width,pgsize.height, BufferedImage.TYPE_INT_RGB);   
                Graphics2D graphics = img.createGraphics();   
                graphics.setPaint(Color.BLUE);   
                graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width, pgsize.height));   
                slide[i].draw(graphics);   

                // 这里设置图片的存放路径和图片的格式(jpeg,png,bmp等等),注意生成文件路径   
                String imgName = File.separator + folderName + File.separator +"pict_"+ (i + 1) + ".jpeg";
                
                FileOutputStream out = new FileOutputStream(wordPath + imgName);   
                javax.imageio.ImageIO.write(img, "jpeg", out);
                out.close();   
                
                imgList.add(File.separator + "upload" + imgName);
            }
        } catch (FileNotFoundException e) {
        	e.printStackTrace();
        	return rv;
        } catch (IOException e) {
        	e.printStackTrace();
        	return rv;
        }
        rv = createHtml(wordPath,imgList, tempContextUrl);
        return rv;
	}
	
	/**
	 * ppt转html时生成html
	 * @param wordPath	upload根目录
	 * @param imgList	所有幻灯片路径
	 * @param tempContextUrl	项目访问路径
	 * @return
	 */
	private int createHtml(String wordPath,List<String> imgList,String tempContextUrl){
		int rv = 0;
		StringBuilder sb = new StringBuilder("<!doctype html><html><head><meta charset='utf-8'><title>无标题文档</title></head><body>");
		if (imgList != null && !imgList.isEmpty()) {
			for (String img : imgList) {
				sb.append("<img src='" + img + "' /><br>");
			}
		}
		sb.append("</body></html>");
		
		String uuid = UidUtil.generateUUID();
		try {
			File file = new File(wordPath + File.separator + uuid + ".html");
			BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file),"UTF-8"));
			bufferedWriter.write(sb.toString());
			bufferedWriter.close();
		} catch (IOException e) {
			e.printStackTrace();
			return rv;
		}
		presentationDto.setHtmlPath(tempContextUrl + "upload" + File.separator + uuid+".html");
		return 1;
	}

4、pdf转html

原理同ppt转html一样

/**
	 * pdf转html
	 * @param tempContextUrl
	 * @return
	 */
	private int pdfToHtml(String tempContextUrl){
		int rv = 0;
		String path = presentationDto.getWordPath();
		//word路径
		String wordPath = path.substring(0, path.indexOf("upload")+6);
		//文件夹名
		String folderName = UidUtil.generateUUID();
		List<String> imgList = new ArrayList<String>();
		File file = new File(path);
		try {
			PDDocument doc = PDDocument.load(path);
			int pageCount = doc.getPageCount(); 
			System.out.println(pageCount); 
			List pages = doc.getDocumentCatalog().getAllPages(); 
			for(int i=0;i<pages.size();i++){
			    PDPage page = (PDPage)pages.get(i); 
			    BufferedImage image = page.convertToImage(); 
			    Iterator iter = ImageIO.getImageWritersBySuffix("jpg"); 
			    ImageWriter writer = (ImageWriter)iter.next(); 
			    String imgName = File.separator + folderName + File.separator +i+".jpg";
			    File folder = new File(wordPath + File.separator + folderName);	//先创建文件夹
			    folder.mkdirs();
			    File outFile = new File(wordPath + imgName);	//再创建文件
			    imgList.add(File.separator + "upload" + imgName);
			    outFile.createNewFile();
			    FileOutputStream out = new FileOutputStream(outFile); 
			    ImageOutputStream outImage = ImageIO.createImageOutputStream(out); 
			    writer.setOutput(outImage); 
			    writer.write(new IIOImage(image,null,null)); 
			}
			doc.close();
		} catch (FileNotFoundException e) {
			e.printStackTrace();
			return rv;
		} catch (IOException e) {
			e.printStackTrace();
			return rv;
		}
		rv = createHtml(wordPath, imgList, tempContextUrl);
		return 1;
	}



  • 6
    点赞
  • 30
    收藏
    觉得还不错? 一键收藏
  • 8
    评论
好的,以下是一个简单的Java程序示例,使用Apache POI和Apache PDFBox库将Office文件(Word、Excel和PowerPoint)换为PDF文件: ```java import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.poi.hslf.usermodel.HSLFSlideShow; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.usermodel.WorkbookFactory; import org.apache.poi.xslf.usermodel.XMLSlideShow; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; import org.apache.pdfbox.pdmodel.font.PDType1Font; import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; public class OfficeToPDFConverter { public static void main(String[] args) throws IOException, InvalidFormatException { String inputFile = "input.docx"; String outputFile = "output.pdf"; convertToPdf(inputFile, outputFile); } public static void convertToPdf(String inputFile, String outputFile) throws IOException, InvalidFormatException { File inputFileObj = new File(inputFile); File outputFileObj = new File(outputFile); String fileExtension = getFileExtension(inputFileObj); if (fileExtension.equalsIgnoreCase("docx")) { XWPFDocument document = new XWPFDocument(WorkbookFactory.create(inputFileObj)); PDDocument pdfDoc = new PDDocument(); PDPage page = new PDPage(); try (PDPageContentStream contentStream = new PDPageContentStream(pdfDoc, page)) { pdfDoc.addPage(page); contentStream.beginText(); contentStream.setFont(PDType1Font.HELVETICA_BOLD, 12); contentStream.newLineAtOffset(20, 750); for (XWPFParagraph para : document.getParagraphs()) { contentStream.showText(para.getText()); contentStream.newLine(); } contentStream.endText(); } pdfDoc.save(outputFileObj); pdfDoc.close(); document.close(); } else if (fileExtension.equalsIgnoreCase("xlsx")) { XSSFWorkbook workbook = new XSSFWorkbook(inputFileObj); PDDocument pdfDoc = new PDDocument(); PDPage page = new PDPage(); try (PDPageContentStream contentStream = new PDPageContentStream(pdfDoc, page)) { pdfDoc.addPage(page); contentStream.beginText(); contentStream.setFont(PDType1Font.HELVETICA_BOLD, 12); contentStream.newLineAtOffset(20, 750); for (int i = 0; i < workbook.getNumberOfSheets(); i++) { contentStream.showText(workbook.getSheetAt(i).getSheetName()); contentStream.newLine(); } contentStream.endText(); } pdfDoc.save(outputFileObj); pdfDoc.close(); workbook.close(); } else if (fileExtension.equalsIgnoreCase("pptx")) { XMLSlideShow ppt = new XMLSlideShow(WorkbookFactory.create(inputFileObj)); PDDocument pdfDoc = new PDDocument(); PDPage page = new PDPage(); try (PDPageContentStream contentStream = new PDPageContentStream(pdfDoc, page)) { pdfDoc.addPage(page); contentStream.beginText(); contentStream.setFont(PDType1Font.HELVETICA_BOLD, 12); contentStream.newLineAtOffset(20, 750); for (XSLFSlide slide : ppt.getSlides()) { contentStream.showText(slide.getTitle()); contentStream.newLine(); } contentStream.endText(); } pdfDoc.save(outputFileObj); pdfDoc.close(); ppt.close(); } else if (fileExtension.equalsIgnoreCase("xls")) { HSSFWorkbook workbook = new HSSFWorkbook(inputFileObj); PDDocument pdfDoc = new PDDocument(); PDPage page = new PDPage(); try (PDPageContentStream contentStream = new PDPageContentStream(pdfDoc, page)) { pdfDoc.addPage(page); contentStream.beginText(); contentStream.setFont(PDType1Font.HELVETICA_BOLD, 12); contentStream.newLineAtOffset(20, 750); for (int i = 0; i < workbook.getNumberOfSheets(); i++) { contentStream.showText(workbook.getSheetAt(i).getSheetName()); contentStream.newLine(); } contentStream.endText(); } pdfDoc.save(outputFileObj); pdfDoc.close(); workbook.close(); } } private static String getFileExtension(File file) { String fileName = file.getName(); if (fileName.lastIndexOf(".") != -1 && fileName.lastIndexOf(".") != 0) { return fileName.substring(fileName.lastIndexOf(".") + 1); } else { return ""; } } } ``` 在此示例中,我们创建了一个名为`OfficeToPDFConverter`的Java类,该类使用Apache POI和Apache PDFBox库将Office文件(Word、Excel和PowerPoint)换为PDF文件。在`main`方法中,我们调用`convertToPdf`方法,该方法接受输入文件路径和输出文件路径作为参数。在`convertToPdf`方法中,我们首先获取输入文件的扩展名,然后根据文件类型使用不同的POI库加载文件。然后,我们使用PDFBox库来创建一个PDF文档,并将Office文件的内容写入PDF页面。最后,我们将PDF文档保存到输出文件中。 请注意,在使用该程序之前,您需要下载并导入Apache POI和Apache PDFBox库。
评论 8
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值