一、maven依赖
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.29</version>
<exclusions>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/fontbox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>fontbox</artifactId>
<version>2.0.29</version>
</dependency>
二、工具类
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
@Slf4j
public class PdfToImageUtil {
private static final Integer DPI = 100;
private static final String IMG_TYPE = "jpg";
private static final ExecutorService executorService = Executors.newFixedThreadPool(10);
public static PDDocument getPdDocument(String pdfFile) throws IOException {
return PDDocument.load(new File(pdfFile), MemoryUsageSetting.setupTempFileOnly());
}
public static PDFRenderer getPdfRenderer(PDDocument document) {
return new PDFRenderer(document);
}
public static void convertPdf2Images(String path, String fileName) {
long start = System.currentTimeMillis();
try (PDDocument document = getPdDocument(path + fileName)) {
PDFRenderer pdfRenderer = getPdfRenderer(document);
int pages = document.getNumberOfPages();
for (int i = 0; i < pages; i++) {
Boolean result = convertSingleImage(pdfRenderer, path, i);
log.info("转换结果:{}", result);
}
} catch (IOException e) {
throw new RuntimeException(e);
} finally {
long end = System.currentTimeMillis();
log.info("串行处理耗时:{}", (end - start));
}
}
public static void convertPdf2ImagesParallel(String path, String fileName) {
long start = System.currentTimeMillis();
try (PDDocument document = getPdDocument(path + fileName)) {
int pages = document.getNumberOfPages();
List<CompletableFuture<Boolean>> futures = new ArrayList<>(10);
for (int i = 0; i < pages; i++) {
int finalI = i;
CompletableFuture<Boolean> future = CompletableFuture
.supplyAsync(() -> convertSingleImage(document, path, finalI), executorService);
futures.add(future);
}
CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).get(5, TimeUnit.MINUTES);
for (CompletableFuture<Boolean> future : futures) {
Boolean result = future.get();
log.info("任务执行结果为:{}", result);
}
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
long end = System.currentTimeMillis();
log.info("并行处理耗时:{}", (end - start));
}
}
public static Boolean convertSingleImage(PDFRenderer pdfRenderer, String path, int pageIndex) {
try {
BufferedImage bufferedImage = pdfRenderer.renderImageWithDPI(pageIndex, DPI);
ByteArrayOutputStream out = new ByteArrayOutputStream();
ImageIO.write(bufferedImage, IMG_TYPE, out);
byte[] bytes = out.toByteArray();
String imageFileName = String.format("%s.jpg", pageIndex);
return saveImage(bytes, path, imageFileName);
} catch (Exception e) {
log.error("转换图片文件错误:{}", e.getMessage(), e);
throw new RuntimeException(e);
}
}
public static Boolean convertSingleImage(PDDocument document, String path, int pageIndex) {
PDFRenderer pdfRenderer = getPdfRenderer(document);
return convertSingleImage(pdfRenderer, path, pageIndex);
}
public static Boolean saveImage(byte[] imageBytes, String savePath, String fileName) {
try {
BufferedImage image = ImageIO.read(new ByteArrayInputStream(imageBytes));
FileUtils.createDirectoryIfNotExists(savePath);
File directory = new File(savePath);
File file = new File(directory, fileName);
return ImageIO.write(image, IMG_TYPE, file);
} catch (IOException e) {
log.error("保存文件错误:{}", e.getMessage(), e);
return false;
}
}
private static void convert1() {
String path = "D:\\myself\\docs\\PDF\\";
String pdf = "demo.pdf";
convertPdf2Images(path, pdf);
}
private static void convert2() {
String path = "D:\\myself\\docs\\PDF\\";
String pdf = "demo - 副本.pdf";
convertPdf2ImagesParallel(path, pdf);
}
public static void main(String[] args) {
convert1();
convert2();
}
}
三、测试结果
2024-08-01 11:13:47 [INFO ] [main] - [] - com.summer.toolkit.util.PdfToImageUtil.convertPdf2Images():77 - 转换结果:true
2024-08-01 11:13:52 [INFO ] [main] - [] - com.summer.toolkit.util.PdfToImageUtil.convertPdf2Images():77 - 转换结果:true
2024-08-01 11:13:58 [INFO ] [main] - [] - com.summer.toolkit.util.PdfToImageUtil.convertPdf2Images():77 - 转换结果:true
2024-08-01 11:14:05 [INFO ] [main] - [] - com.summer.toolkit.util.PdfToImageUtil.convertPdf2Images():77 - 转换结果:true
2024-08-01 11:14:08 [INFO ] [main] - [] - com.summer.toolkit.util.PdfToImageUtil.convertPdf2Images():77 - 转换结果:true
2024-08-01 11:14:12 [INFO ] [main] - [] - com.summer.toolkit.util.PdfToImageUtil.convertPdf2Images():77 - 转换结果:true
2024-08-01 11:14:16 [INFO ] [main] - [] - com.summer.toolkit.util.PdfToImageUtil.convertPdf2Images():77 - 转换结果:true
2024-08-01 11:14:16 [INFO ] [main] - [] - com.summer.toolkit.util.PdfToImageUtil.convertPdf2Images():77 - 转换结果:true
2024-08-01 11:14:16 [INFO ] [main] - [] - com.summer.toolkit.util.PdfToImageUtil.convertPdf2Images():83 - 串行处理耗时:33431
2024-08-01 11:14:23 [INFO ] [main] - [] - com.summer.toolkit.util.PdfToImageUtil.convertPdf2ImagesParallel():113 - 任务执行结果为:true
2024-08-01 11:14:23 [INFO ] [main] - [] - com.summer.toolkit.util.PdfToImageUtil.convertPdf2ImagesParallel():113 - 任务执行结果为:true
2024-08-01 11:14:23 [INFO ] [main] - [] - com.summer.toolkit.util.PdfToImageUtil.convertPdf2ImagesParallel():113 - 任务执行结果为:true
2024-08-01 11:14:23 [INFO ] [main] - [] - com.summer.toolkit.util.PdfToImageUtil.convertPdf2ImagesParallel():113 - 任务执行结果为:true
2024-08-01 11:14:23 [INFO ] [main] - [] - com.summer.toolkit.util.PdfToImageUtil.convertPdf2ImagesParallel():113 - 任务执行结果为:true
2024-08-01 11:14:23 [INFO ] [main] - [] - com.summer.toolkit.util.PdfToImageUtil.convertPdf2ImagesParallel():113 - 任务执行结果为:true
2024-08-01 11:14:23 [INFO ] [main] - [] - com.summer.toolkit.util.PdfToImageUtil.convertPdf2ImagesParallel():113 - 任务执行结果为:true
2024-08-01 11:14:23 [INFO ] [main] - [] - com.summer.toolkit.util.PdfToImageUtil.convertPdf2ImagesParallel():113 - 任务执行结果为:true
2024-08-01 11:14:23 [INFO ] [main] - [] - com.summer.toolkit.util.PdfToImageUtil.convertPdf2ImagesParallel():120 - 并行处理耗时:6239