注意事项:转图片后需仔细核对,因PDF内的字体可能不是宋体等最常见字体,转图片后可能出现中文乱码现象,需要查看原PDF文件中对应字体,下载后安装在本地或服务器上。
需引入jar依赖
<dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox</artifactId> <version>2.0.11</version> </dependency>
情景1:将PDF转成一页长图
public static BufferedImage pdfToImage(InputStream inputStream) { //图像合并使用参数 int width = 0; // 总宽度 int[] singleImgRGB; // 保存一张图片中的RGB数据 int shiftHeight = 0; BufferedImage imageResult = null;//保存每张图片的像素值 try { //利用PdfBox生成图像 PDDocument pdDocument = PDDocument.load(inputStream); PDFTextStripper text = new PDFTextStripper(); String text1 = text.getText(pdDocument); logger.info("读取PDF文字:" + text1); PDFRenderer renderer = new PDFRenderer(pdDocument); //循环每个页码 for (int i = 0, len = pdDocument.getNumberOfPages(); i < len; i++) { //dpi参数越大,越清晰 BufferedImage image = renderer.renderImageWithDPI(i, 300, ImageType.RGB); int imageHeight = image.getHeight(); int imageWidth = image.getWidth(); if (i == 0) {//计算高度和偏移量 width = imageWidth;//使用第一张图片宽度; //保存每页图片的像素值 imageResult = new BufferedImage(width, imageHeight * len, BufferedImage.TYPE_INT_RGB); } else { shiftHeight += imageHeight; // 计算偏移高度 } singleImgRGB = image.getRGB(0, 0, width, imageHeight, null, 0, width); imageResult.setRGB(0, shiftHeight, width, imageHeight, singleImgRGB, 0, width); // 写入流中 } pdDocument.close(); } catch (Exception e) { logger.error(" 从PDF转换JPG格式异常!", e); } return imageResult; }
public static void main(String[] args) { InputStream inputStream = null; BufferedImage bufferedImage = null; String url = ""; try { //pdf转img bufferedImage = PDFUtils.pdfToImage(new URL(url)); ByteArrayOutputStream os = new ByteArrayOutputStream(); ImageIO.write(bufferedImage, "png", os); inputStream = new ByteArrayInputStream(os.toByteArray()); } catch (IOException e) { e.printStackTrace(); } }
得到流后就可以下载到本地或者转存其他空间
情景2:将PDF文件转为图片,一页PDF对应一张图片,最终将图片打成zip包
public static ByteArrayOutputStream pdfToZip(String fileUrl, String fileName) { ByteArrayOutputStream bos = null; try { DataInputStream inputStream = new DataInputStream(new URL(fileUrl).openStream()); PDDocument pdDocument = PDDocument.load(inputStream); PDFRenderer renderer = new PDFRenderer(pdDocument); bos = new ByteArrayOutputStream(); ZipOutputStream zos = new ZipOutputStream(bos); byte[] bytes = null; //循环每个页码,一页都不能缺,所以内部不try-catch for (int i = 0, len = pdDocument.getNumberOfPages(); i < len; i++) { BufferedImage image = renderer.renderImageWithDPI(i, 300); ByteArrayOutputStream temp = new ByteArrayOutputStream(); ImageIO.write(image, "png", temp); zos.putNextEntry(new ZipEntry(fileName + i + ".png")); bytes = temp.toByteArray(); zos.write(bytes, 0, bytes.length); zos.closeEntry(); temp.close(); } zos.close(); pdDocument.close(); } catch (Exception e) { logger.error(" 从PDF转换PNG/ZIP格式异常!", e); } return bos; }
public static void main(String[] args) { ByteArrayOutputStream os = pdfToZip(url, fileName); InputStream inputStream = new ByteArrayInputStream(os.toByteArray()); }