pdfbox升级2.0以后,很多类的路径变了。很多类的名称也变了。老版本提取图片的方法可以做如下修改
导包
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
获取方法
/**
* 从pdf里面提取出图片 。不是转换图片
* @param document
* @return
* @throws IOException
*/
public static List<PDImageXObject> getImagesFromPDF(PDDocument document) throws IOException {
List<PDImageXObject> images = new ArrayList<PDImageXObject>();
for (PDPage page : document.getPages()) {
images.addAll(getImagesFromResources(page.getResources()));
}
return images;
}
/**
* 从pdf里面提取出图片 。不是转换图片
* @param resources
* @return
* @throws IOException
*/
private static List<PDImageXObject> getImagesFromResources(PDResources resources) throws IOException {
List<PDImageXObject> images = new ArrayList<PDImageXObject>();
for (COSName xObjectName : resources.getXObjectNames()) {
PDXObject xObject = resources.getXObject(xObjectName);
if (xObject instanceof PDFormXObject) {
continue;
} else if (xObject instanceof PDImageXObject) {
PDImageXObject obj = (PDImageXObject) xObject;
images.add(obj);
}
}
return images;
}
获取之后保存图片
List<PDImageXObject> imgList = getImagesFromPDF(document);
for(PDImageXObject object:imgList){
BufferedImage bi = object.getImage();
String fileFormat = object.getSuffix();
count++;
String name =count+"."+fileFormat; // 图片文件名
File outputfile = new File(targetFolder + name);
ImageIO.write(bi, fileFormat, outputfile);
}