依赖pdfbox提取PDF文件图片
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.UUID;
import java.util.function.Consumer;
import javax.imageio.ImageIO;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
/**
*
* @author FishingGo
* @time 2018-04-11 10:50
* @description Extract PDF document images PDF 图片提取 jdk1.8
*/
public class GetImageFromPdf {
/**
* @param dir
* @param realName
* @return
* @throws IOException
*/
public static File mkdirsFile(String dir, String realName) throws IOException {
File file = new File(dir, realName);
if (!file.exists()) {
if (!file.getParentFile().exists()) {
file.getParentFile().mkdirs();
}
file.createNewFile();
}
return file;
}
/**
* load PDF
* @param path
* @return
* @throws InvalidPasswordException
* @throws IOException
*/
public static PDDocument loadPdf(String path) throws InvalidPasswordException, IOException{
return PDDocument.load(new File(path));
}
/**
* loadPDPages
* @param pdDocument
* @return
*/
public static Iterator<PDPage> loadPDPages(PDDocument pdDocument){
//pdpagetree
PDPageTree pageTree=pdDocument.getPages();
return pageTree.iterator();
}
public static void convertImage(Iterator<PDPage> iterator){
iterator.forEachRemaining(new Consumer<PDPage>() {
public void accept(PDPage pdPage) {
//load resoure
PDResources pdResources=pdPage.getResources();
//load XObjectNames
Iterable<COSName> iterable = pdResources.getXObjectNames();
//loadXObject
iterable.forEach(new Consumer<COSName>() {
public void accept(COSName t) {
try {
System.out.println("###########################################");
System.out.println();
//is ImageXObject
if(pdResources.isImageXObject(t)){
System.out.println("COSName "+t.getName()+" isImageXObject");
PDXObject pdXObject = pdResources.getXObject(t);
PDImageXObject pdImageXObject=(PDImageXObject) pdXObject;
String suffix=pdImageXObject.getSuffix();
System.out.println("Height:"+pdImageXObject.getHeight()+"Width:"+pdImageXObject.getWidth()+"Suffix:"+suffix);
BufferedImage image=pdImageXObject.getImage();
ImageIO.write(image, suffix, mkdirsFile("E://pdf/pic", UUID.randomUUID().toString()+"."+suffix));
}else{
System.out.println("COSName "+t.getName()+" isOtherXObject");
}
System.out.println();
System.out.println("###########################################");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
});
}
});
}
public static void main(String[] args) {
try {
convertImage(loadPDPages(loadPdf("E://1.pdf")));
} catch (InvalidPasswordException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.8</version>
</dependency>