Java pdf 转图片

最新推荐文章于 2024-09-23 16:50:35 发布

weixin_34235105

最新推荐文章于 2024-09-23 16:50:35 发布

阅读量64

点赞数

文章标签： python java

原文链接：https://my.oschina.net/u/3757402/blog/3026251

版权

2019独角兽企业重金招聘Python工程师标准>>>

maven 依赖：

<dependency>
  <groupId>org.apache.pdfbox</groupId>
  <artifactId>pdfbox</artifactId>
  <version>2.0.8</version>
</dependency>
<dependency>
  <groupId>org.apache.pdfbox</groupId>
  <artifactId>pdfbox-tools</artifactId>
  <version>2.0.8</version>
</dependency>

代码示例：

private static final int HOME_PAGE_INDEX = 0;

/**
 * Pdf -> Image (首页)
 *
 * [@param](https://my.oschina.net/u/2303379) pdf    pdf流
 * [@param](https://my.oschina.net/u/2303379) format 图片格式
 * [@return](https://my.oschina.net/u/556800) pdf 图片流
 */
public static byte[] getImageFromPdf(byte[] pdf, String format) {
    return pdfHomePageToImage(getFromByteArray(pdf), format);
}

/**
 * Pdf -> Image (首页)
 *
 * [@param](https://my.oschina.net/u/2303379) pdf    pdf文件
 * [@param](https://my.oschina.net/u/2303379) format 图片格式
 * @return pdf 图片流
 */
public static byte[] getImageFromPdf(File pdf, String format) {
    return pdfHomePageToImage(getFromFile(pdf), format);
}

/**
 * Pdf -> Image (指定页)
 *
 * @param pdf       pdf文件
 * @param pageIndex 页号(页号从0开始)
 * @param format    图片格式
 * @return pdf 图片流
 */
public static byte[] getImageFromPdf(File pdf, int pageIndex, String format) {
    return pdfToImageWithIndex(getFromFile(pdf), pageIndex, pageIndex, format).get(0);
}

/**
 * Pdf -> Image (指定页)
 *
 * @param pdf       pdf流
 * @param pageIndex 起始页号(页号从0开始)
 * @param format    图片格式
 * @return pdf 图片流
 */
public static byte[] getImageFromPdf(byte[] pdf, int pageIndex, String format) {
    return pdfToImageWithIndex(getFromByteArray(pdf), pageIndex, pageIndex, format).get(0);
}

/**
 * Pdf -> Image (指定页)
 *
 * @param pdf        pdf文件
 * @param startIndex 起始页号(页号从0开始)
 * @param endIndex   终止页号
 * @param format     图片格式
 * @return pdf 图片流
 */
public static List<byte[]> getImageFromPdf(File pdf, int startIndex, int endIndex, String format) {
    return pdfToImageWithIndex(getFromFile(pdf), startIndex, endIndex, format);
}

/**
 * Pdf -> Image (指定页)
 *
 * @param pdf        pdf流
 * @param startIndex 起始页号(页号从0开始)
 * @param endIndex   终止页号
 * @param format     图片格式
 * @return pdf 图片流
 */
public static List<byte[]> getImageFromPdf(byte[] pdf, int startIndex, int endIndex, String format) {
    return pdfToImageWithIndex(getFromByteArray(pdf), startIndex, endIndex, format);
}

private static List<byte[]> pdfToImageWithIndex(PDDocument document, int startIndex, int endIndex, String format) {
    List<byte[]> pdfImages = new ArrayList<>();
    if (startIndex > endIndex) {
        throw new IllegalArgumentException("The param startIndex cannot be greater than endIndex");
    }
    if (endIndex > pdfTotalPages(document)) {
        throw new IllegalArgumentException(String.format("The pdf max page index is [%s], But the endIndex you input is [%s]", pdfTotalPages(document), endIndex));
    }
    for (int i = startIndex; i < endIndex; i++) {
        pdfImages.add(pdfToImage(document, i, endIndex, format));
    }
    return pdfImages;
}

/**
 * Pdf -> Image (所有页)
 *
 * @param pdf    pdf byte[]
 * @param format 图片格式
 * @return 图片 byte[]
 */
public static List<byte[]> getImageFromPdfAllPages(byte[] pdf, String format) {
    return pdfToImageForAllPages(getFromByteArray(pdf), format);
}

/**
 * Pdf -> Image (所有页)
 *
 * @param pdf    pdf文件
 * @param format 图片格式
 * @return 图片 byte[]
 */
public static List<byte[]> getImageFromPdfAllPages(File pdf, String format) {
    return pdfToImageForAllPages(getFromFile(pdf), format);
}

private static List<byte[]> pdfToImageForAllPages(PDDocument document, String format) {
    List<byte[]> pdfImages = new ArrayList<>();
    int totalPages = pdfTotalPages(document);
    for (int i = 0; i < totalPages; i++) {
        pdfImages.add(pdfToImage(document, i, totalPages, format));
    }
    return pdfImages;
}

/**
 * pdf首页 -> image
 *
 * @param document PDDocument
 * @param format   图片格式
 * @return 图片 byte[]
 */
private static byte[] pdfHomePageToImage(PDDocument document, String format) {
    return pdfToImage(document, HOME_PAGE_INDEX, HOME_PAGE_INDEX, format);
}


/**
 * pdf -> image
 *
 * @param document   PDDocument
 * @param startIndex 页号
 * @param endIndex   页数
 * @param format     图片格式
 * @return 图片 byte[]
 */
private static byte[] pdfToImage(PDDocument document, int startIndex, int endIndex, String format) {
    try {
        PDFRenderer pdfRenderer = new PDFRenderer(document);
        BufferedImage bufferedImage = pdfRenderer.renderImageWithDPI(startIndex, 100);
        return imageToBytes(bufferedImage, format);
    } catch (Exception e) {
        log.warn("Pdf Read Error:", e);
    } finally {
        // 开发者需要的页数读取完再关闭
        if (startIndex == endIndex) {
            try {
                document.close();
            } catch (IOException e) {
                log.warn("IO Exception", e);
            }
        }

    }
    return null;
}

/**
 * 获取pdf总页数
 *
 * @param document pdf
 * @return pdf总页数
 */
private static int pdfTotalPages(PDDocument document) {
    return document.getNumberOfPages();
}

/**
 * PDDocument
 *
 * @param pdf pdf文件
 * @return 图片 byte[]
 */
private static PDDocument getFromFile(File pdf) {
    try {
        return PDDocument.load(pdf);
    } catch (IOException e) {
        throw new RuntimeException(String.format("Load pdf %s, The File must be a pdf.", e.getMessage()));
    }
}

/**
 * PDDocument
 *
 * @param pdf pdf文件 byte[]
 * @return 图片 byte[]
 */
private static PDDocument getFromByteArray(byte[] pdf) {
    try {
        return PDDocument.load(pdf);
    } catch (IOException e) {
        e.printStackTrace();
        throw new RuntimeException("Load pdf error: The File must be a pdf.");
    }

}

/**
 * 转换BufferedImage 数据为byte数组
 *
 * @param bImage Image对象
 * @param format image格式字符串.如"gif","png"
 * @return byte数组
 */
private static byte[] imageToBytes(BufferedImage bImage, String format) {
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    try {
        ImageIO.write(bImage, format, out);
    } catch (IOException e) {
        log.warn("IO Exception", e);
    }
    return out.toByteArray();
}

转载于:https://my.oschina.net/u/3757402/blog/3026251