Java 提取PDF图片(pdfbox)Extract PDF document images

依赖pdfbox提取PDF文件图片

import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.UUID;
import java.util.function.Consumer;

import javax.imageio.ImageIO;

import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
/**
 * 
 * @author FishingGo
 * @time 2018-04-11 10:50
 * @description Extract PDF document images  PDF 图片提取   jdk1.8
 */
public class GetImageFromPdf {

	/**
	 * @param dir
	 * @param realName
	 * @return
	 * @throws IOException
	 */
	public static File mkdirsFile(String dir, String realName) throws IOException {
		File file = new File(dir, realName);
		if (!file.exists()) {
			if (!file.getParentFile().exists()) {
				file.getParentFile().mkdirs();
			}
			file.createNewFile();
		}
		return file;
	}
	
	/**
	 * load  PDF
	 * @param path
	 * @return
	 * @throws InvalidPasswordException
	 * @throws IOException
	 */
	public static PDDocument loadPdf(String path) throws InvalidPasswordException, IOException{
		return PDDocument.load(new File(path));
	}
	
	/**
	 * loadPDPages
	 * @param pdDocument
	 * @return
	 */
	public static Iterator<PDPage> loadPDPages(PDDocument pdDocument){
		//pdpagetree
		PDPageTree pageTree=pdDocument.getPages();
		return pageTree.iterator();
	}
	
	
	public static void convertImage(Iterator<PDPage> iterator){
		iterator.forEachRemaining(new Consumer<PDPage>() {
			public void accept(PDPage pdPage) {
				//load resoure
				PDResources pdResources=pdPage.getResources();
				//load XObjectNames
				Iterable<COSName> iterable = pdResources.getXObjectNames();
				//loadXObject
				iterable.forEach(new Consumer<COSName>() {
					public void accept(COSName t) {
						try {
							System.out.println("###########################################");
							System.out.println();
							//is ImageXObject
							if(pdResources.isImageXObject(t)){
								System.out.println("COSName "+t.getName()+" isImageXObject");
								PDXObject pdXObject = pdResources.getXObject(t);
								PDImageXObject pdImageXObject=(PDImageXObject) pdXObject;
								String suffix=pdImageXObject.getSuffix();
								System.out.println("Height:"+pdImageXObject.getHeight()+"Width:"+pdImageXObject.getWidth()+"Suffix:"+suffix);
								BufferedImage image=pdImageXObject.getImage();
								ImageIO.write(image, suffix, mkdirsFile("E://pdf/pic", UUID.randomUUID().toString()+"."+suffix));
							}else{
								System.out.println("COSName "+t.getName()+" isOtherXObject");
							}
							System.out.println();
							System.out.println("###########################################");
						} catch (IOException e) {
							// TODO Auto-generated catch block
							e.printStackTrace();
						}
						
					}
				});
			}
		});
	}
	
	
	public static void main(String[] args) {
		try {
			convertImage(loadPDPages(loadPdf("E://1.pdf")));
		} catch (InvalidPasswordException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
}

 

<dependency>
    <groupId>org.apache.pdfbox</groupId>
    <artifactId>pdfbox</artifactId>
    <version>2.0.8</version>
</dependency>

 

 

 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值