1.导入maven依赖
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.8</version>
</dependency>
<dependency>
<groupId>com.github.jai-imageio</groupId>
<artifactId>jai-imageio-jpeg2000</artifactId>
<version>1.3.0</version>
</dependency>
2.工具类
public class PdfUtil {
private static Logger logger = LoggerFactory.getLogger(PdfUtil.class);
/**
* 通过PDFbox获取文章总页数
*
* @param filePath:文件路径
* @return
* @throws IOException
*/
public static int getNumberOfPages(String filePath) throws IOException {
PDDocument pdDocument = PDDocument.load(new File(filePath));
int pages = pdDocument.getNumberOfPages();
pdDocument.close();
return pages;
}
/**
* 通过PDFbox获取文章内容
*
* @param filePath
* @return
*/
public static String getContent(String filePath) throws IOException {
PDFParser pdfParser = new PDFParser(new org.apache.pdfbox.io.RandomAccessFile(new File(filePath), "rw"));
pdfParser.parse();
PDDocument pdDocument = pdfParser.getPDDocument();
String text = new PDFTextStripper().getText(pdDocument);
pdDocument.close();
return text;
}
/**
* 通过PDFbox生成文件的缩略图
*
* @param filePath:文件路径
* @param outPath:输出图片路径
* @throws IOException
*/
public static void getThumbnails(String filePath, String outPath) throws IOException {
// 利用PdfBox生成图像
PDDocument pdDocument = PDDocument.load(new File(filePath));
PDFRenderer renderer = new PDFRenderer(pdDocument);
// 构造图片
BufferedImage imgTemp = renderer.renderImageWithDPI(0, 30, ImageType.RGB);
// 设置图片格式
Iterator<ImageWriter> it = ImageIO.getImageWritersBySuffix("png");
// 将文件写出
ImageWriter writer = it.next();
ImageOutputStream imageout = ImageIO.createImageOutputStream(new FileOutputStream(outPath));
writer.setOutput(imageout);
writer.write(new IIOImage(imgTemp, null, null));
imgTemp.flush();
imageout.flush();
imageout.close();
pdDocument.close();
}
/**
* PDF转图片 根据页码一页一页转
*
* @throws IOException imgType:转换后的图片类型 jpg,png
*/
public static void PDFToImg(OutputStream sos, String fileUrl, int page, String imgType) throws IOException {
PDDocument pdDocument = null;
/* dpi越大转换后越清晰,相对转换速度越慢 */
int dpi = 100;
try {
pdDocument = getPDDocument(fileUrl);
PDFRenderer renderer = new PDFRenderer(pdDocument);
int pages = pdDocument.getNumberOfPages();
if (page <= pages && page >= 0) {
BufferedImage image = renderer.renderImageWithDPI(page, dpi);
ImageIO.write(image, imgType, sos);
}
} catch (Exception e) {
logger.error(e.getMessage());
} finally {
if (pdDocument != null) {
pdDocument.close();
}
}
}
private static PDDocument getPDDocument(String fileUrl) throws IOException {
File file = new File(fileUrl);
FileInputStream inputStream = new FileInputStream(file);
return PDDocument.load(inputStream);
}
}
3.测试
@Test
public void testPdf() throws IOException{
String filePath = "/Users/apple/Desktop/学习书籍/Docker从入门到实践.pdf";
int numberOfPages = PdfUtil.getNumberOfPages(filePath);
System.out.println("该pdf总页数为:" + numberOfPages);
//获取pdf的前三页图片(若不足三页,有几页获取几页)
for (int i = 0; i < 3 && i < numberOfPages ; i++) {
PdfUtil.PDFToImg(new FileOutputStream(new File("/Users/apple/Desktop/学习书籍/Docker从入门到实践" + i + ".png")), filePath, i, "PNG");
}
}
控制台打印:
该pdf总页数为:370
对应的文件夹下生成以下图片: