项目中运用到pdf文件转换成image图片,开始时使用pdfbox开源库进行图片转换,但是转换出来的文件中含有部分乱码的情况.下面是pdfBox 的pdf转换图片的代码示例.
try{
String password = null;
int startPage = 1;
String imageType = "jpg";
File imageFile = new File("E:\\upload\\pdf\\20140424\\Servlet."+ imageType);
File pdfFile = new File("E:\\upload\\pdf\\20140424\\Servlet.pdf");
PDDocument document = PDDocument.load(pdfFile);
endPage = document.getPageCount();
PDFImageWriter imageWriter = new PDFImageWriter();
imageWriter.writeImage(document,imageType,password,startPage, endPage,imageFile.getAbsolutePath());
document.close();
}catch(IOException e){
e.printStackTrace();
}
比较了其他的开源库之后,准备采用jpedal。但是jpedal的治疗非常少,除了官方网站外,即使是英文资料也很少。而且官方提供的代码示例中的一些方法在的lgpl授权的
jpeal的代码库中不存在。下面是收集到的一些资料
1、jpedal文档:http://javadoc.idrsolutions.com/org/jpedal/PdfDecoder.html
2、简单调用示例:http://www.idrsolutions.com/java-pdf-code-faq/#pdf2img3、lgpl授权的jpedal库的下载地址:http://sourceforge.net/projects/jpedal/
4、转换示例示例地址:http://files.idrsolutions.com/samplecode/org/jpedal/examples/images/ConvertPagesToImages.java.html
5、高清图片转换示例地址:http://files.idrsolutions.com/samplecode/org/jpedal/examples/images/ConvertPagesToHiResImages.java.html
于是稍微修改了官方的转换示例,下面是经过测试可以使用的转换代码
import cn.com.pujiConvert.util.Common;
import com.sun.imageio.plugins.jpeg.JPEGImageWriter;
import org.jpedal.*;
import org.jpedal.color.ColorSpaces;
import org.jpedal.constants.PageInfo;
import org.jpedal.exception.PdfException;
import org.jpedal.external.Options;
import org.jpedal.fonts.FontMappings;
import org.jpedal.objects.PdfFileInformation;
import org.jpedal.utils.LogWriter;
import org.w3c.dom.Element;
import javax.imageio.IIOImage;
import javax.imageio.ImageIO;
import javax.imageio.ImageTypeSpecifier;
import javax.imageio.metadata.IIOMetadata;
import javax.imageio.plugins.jpeg.JPEGImageWriteParam;
import javax.imageio.stream.ImageOutputStream;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.Iterator;
public class ConvertPagesToImages{
/**
* show if image transparent
*/
boolean isTransparent=false;
/**output where we put files */
private String user_dir = System.getProperty("user.dir");
/**use 96 dpi as default so pages correct size (72 will be smaller) */
private float pageScaling =1.33f;
/**flag to show if we print messages */
public static boolean outputMessages = false;
String output_dir=null;
/**correct separator for OS */
String separator = System.getProperty("file.separator");
/**the decoder object which decodes the pdf and returns a data object */
PdfDecoder decode_pdf = null;
//type of image to save thumbnails
private String format = "png";
/** holding all creators that produce OCR pdf's ocr*/
private String[] ocr = {"TeleForm"};
/**scaling to use - default is 100 percent */
private int scaling=100;
/**file password or null */
private String password=null;
//only used if between 0 and 1
private float JPEGcompression=-1f;
private int pageCount = 0;
public ConvertPagesToImages() {
}
public void init(String file_name, int scaling, String format, String output_dir, String password, int pageCount){
/*缩小比率*/
this.scaling = scaling;
/*图片格式*/
this.format = format;
/*输出目录*/
this.output_dir = output_dir;
/*pdf密码*/
this.password = password;
/*输出图片数*/
this.pageCount = pageCount;
/*判断文件是否存在*/
File pdf_file = new File(file_name);
if (!pdf_file.exists()) {
System.out.println("File " + pdf_file + " not found");
System.out.println("May need full path");
return;
}
extraction(file_name, output_dir);
}
private void extraction(String file_name, String output_dir) {
this.output_dir=output_dir;
if (!user_dir.endsWith(separator)){
user_dir = user_dir + separator;
}
if (file_name.toLowerCase().endsWith(".pdf")) {
if(output_dir==null){
output_dir=user_dir + "thumbnails" + separator;
}
decodeFile(file_name,output_dir);
} else {
String[] files = null;
File inputFiles;
if (!file_name.endsWith(separator)){
file_name = file_name + separator;
}
try {
inputFiles = new File(file_name);
if (!inputFiles.isDirectory()) {
System.err.println(file_name + " is not a directory. Exiting program");
}else{
files = inputFiles.list();
}
} catch (Exception ee) {
LogWriter.writeLog("Exception trying to access file " + ee.getMessage());
}
if(files!=null){
for (String file : files) {
if (file.toLowerCase().endsWith(".pdf")) {
if (outputMessages){
System.out.println(file_name + file);
}
decodeFile(file_name + file, output_dir);
}
}
}
}
if(outputMessages){
System.out.println("Thumbnails created");
}
}
/**
* routine to decode a file
*/
private void decodeFile(String file_name,String output_dir) {
String name = "demo"; //set a default just in case
int pointer = file_name.lastIndexOf(separator);
if(pointer==-1){
pointer = file_name.lastIndexOf('/');
}
if (pointer != -1){
name = file_name.substring(pointer + 1, file_name.length() - 4);
}else if((file_name.toLowerCase().endsWith(".pdf"))){
name=file_name.substring(0,file_name.length()-4);
}
//fix for odd files on Linux created when you view pages
if(name.startsWith(".")){
return;
}
//create output dir for images
if(output_dir==null){
output_dir = user_dir + "thumbnails" + separator ;
}
//PdfDecoder returns a PdfException if there is a problem
try {
if(decode_pdf==null){
decode_pdf = new PdfDecoder(true);
}
/**optional JAI code for faster rendering*/
org.jpedal.external.ImageHandler myExampleImageHandler=new org.jpedal.examples.handlers.ExampleImageDrawOnScreenHandler();
decode_pdf.addExternalHandler(myExampleImageHandler, Options.ImageHandler);
//mappings for non-embedded fonts to use
FontMappings.setFontReplacements();
//true as we are rendering page
decode_pdf.setExtractionMode(0, pageScaling);
//don't bother to extract text and images
/**
* open the file (and read metadata including pages in file)
*/
if (outputMessages){
System.out.println("Opening file :" + file_name);
}
if(password != null && password != ""){
decode_pdf.openPdfFile(file_name,password);
}else{
decode_pdf.openPdfFile(file_name);
}
} catch (Exception e) {
System.err.println("8.Exception " + e + " in pdf code in "+file_name);
}
/**
* extract data from pdf (if allowed).
*/
if(decode_pdf.isEncrypted() && !decode_pdf.isFileViewable()){
throw new RuntimeException("Wrong password password used=>"+password+ '<');
}else if ((decode_pdf.isEncrypted()&&(!decode_pdf.isPasswordSupplied())) && (!decode_pdf.isExtractionAllowed())) {
throw new RuntimeException("Extraction not allowed");
} else {
extractPageAsImage(file_name, output_dir, name, isTransparent);
}
/**close the pdf file */
decode_pdf.closePdfFile();
}
private void extractPageAsImage(String file_name, String output_dir, String name, boolean isTransparent) {
//create a directory if it doesn't exist
File output_path = new File(output_dir);
if (!output_path.exists()){
output_path.mkdirs();
}
boolean isSingleOutputFile=false;
boolean compressTiffs = false;
String rawJPEGComp = null;
String jpgFlag = "96";
//page range
int start = 1, end = decode_pdf.getPageCount();
end = (pageCount == 0) ? end : pageCount;
if (outputMessages){
System.out.println("Thumbnails will be in " + output_dir);
}
try {
BufferedImage[] multiPages = new BufferedImage[1 + (end - start)];
for (int page = start; page < end + 1; page++){
getPage(output_dir, name, isTransparent, isSingleOutputFile,rawJPEGComp, jpgFlag, compressTiffs, start, end,multiPages, page);
}
} catch (Exception e) {
decode_pdf.closePdfFile();
throw new RuntimeException("Exception " + e.getMessage()+" with thumbnails on File="+file_name);
}
}
private void getPage(
String output_dir,
String name,
boolean isTransparent,
boolean isSingleOutputFile,
String rawJPEGComp,
String jpgFlag,
boolean compressTiffs,
int start,
int end,
BufferedImage[] multiPages,
int page
) throws PdfException, IOException, FileNotFoundException {
if (outputMessages ){
System.out.println("Page " + page);
}
/**
* 补0操作
*/
String pageAsString = String.valueOf(page);
String maxPageSize = String.valueOf(end);
int padding = maxPageSize.length()-pageAsString.length();
for(int ii = 0; ii < padding; ii++){
pageAsString = '0' + pageAsString;
}
String image_name;
if(isSingleOutputFile){
image_name =name;
}else{
image_name =name+"_page_" + pageAsString;
}
/**
* get PRODUCER and if OCR disable text printing
*/
PdfFileInformation currentFileInformation = decode_pdf.getFileInformationData();
String[] values=currentFileInformation.getFieldValues();
String[] fields=PdfFileInformation.getFieldNames();
for(int i=0;i<fields.length;i++){
if(fields[i].equals("Creator")){
for (String anOcr : ocr) {
if (values[i].equals(anOcr)) {
decode_pdf.setRenderMode(PdfDecoder.RENDERIMAGES);
}
}
}
}
BufferedImage image_to_save;
if(!isTransparent){
image_to_save=decode_pdf.getPageAsImage(page);
}else{
//use this if you want a transparent image
image_to_save =decode_pdf.getPageAsTransparentImage(page);
//java adds odd tint if you save this as JPEG which does not have transparency
// so put as RGB on white background
// (or save as PNG or TIFF which has transparency)
// or just call decode_pdf.getPageAsImage(page)
if(image_to_save!=null && format.toLowerCase().startsWith("jp")){
BufferedImage rawVersion=image_to_save;
int w=rawVersion.getWidth(), h=rawVersion.getHeight();
//blank canvas
image_to_save = new BufferedImage(w,h , BufferedImage.TYPE_INT_RGB);
//
Graphics2D g2 = image_to_save.createGraphics();
//white background
g2.setPaint(Color.WHITE);
g2.fillRect(0,0,w,h);
//paint on image
g2.drawImage(rawVersion, 0, 0,null);
}
}
/*if just gray we can reduce memory usage by converting image to Grayscale
@SuppressWarnings("rawtypes")
Iterator colorspacesUsed = decode_pdf.getPageInfo(PageInfo.COLORSPACES);
int nextID;
boolean isGrayOnly=colorspacesUsed!=null; //assume true and disprove
while(colorspacesUsed!=null && colorspacesUsed.hasNext()){
nextID= (Integer) (colorspacesUsed.next());
if(nextID!= ColorSpaces.DeviceGray && nextID!=ColorSpaces.CalGray){
isGrayOnly=false;
}
}
//draw onto GRAY image to reduce colour depth
if(isGrayOnly){
BufferedImage image_to_save2=new BufferedImage(image_to_save.getWidth(),image_to_save.getHeight(), BufferedImage.TYPE_BYTE_GRAY);
image_to_save2.getGraphics().drawImage(image_to_save,0,0,null);
image_to_save = image_to_save2;
}
//put image in array if multi-images
if(isSingleOutputFile){
multiPages[page-start] = image_to_save;
}
if (image_to_save != null) {
/**BufferedImage does not support any dpi concept. A higher dpi can be created
* using JAI to convert to a higher dpi image*/
//shrink the page to 50% with graphics2D transformation
//- add your own parameters as needed
//you may want to replace null with a hints object if you
//want to fine tune quality.
/** example 1 biliniear scaling
AffineTransform scale = new AffineTransform();
scale.scale(.5, .5); //50% as a decimal
AffineTransformOp scalingOp =new AffineTransformOp(scale, null);
image_to_save =scalingOp.filter(image_to_save, null);
*/
/** example 2 bicubic scaling - better quality but slower
to preserve aspect ratio set newWidth or newHeight to -1*/
/**allow user to specify maximum dimension for thumbnail*/
int maxDimension = -1;
if(scaling!=100 || maxDimension != -1){
int newWidth=image_to_save.getWidth()*scaling/100;
int newHeight=image_to_save.getHeight()*scaling/100;
Image scaledImage;
if(maxDimension != -1 && (newWidth > maxDimension || newHeight > maxDimension)){
if(newWidth > newHeight){
newWidth = maxDimension;
scaledImage= image_to_save.getScaledInstance(newWidth,-1,BufferedImage.SCALE_SMOOTH);
} else {
newHeight = maxDimension;
scaledImage= image_to_save.getScaledInstance(-1,newHeight,BufferedImage.SCALE_SMOOTH);
}
} else {
scaledImage= image_to_save.getScaledInstance(newWidth,-1,BufferedImage.SCALE_SMOOTH);
}
if(format.toLowerCase().startsWith("jp")){
image_to_save = new BufferedImage(scaledImage.getWidth(null),scaledImage.getHeight(null) , BufferedImage.TYPE_INT_RGB);
}else{
image_to_save = new BufferedImage(scaledImage.getWidth(null),scaledImage.getHeight(null) , BufferedImage.TYPE_INT_ARGB);
}
Graphics2D g2 = image_to_save.createGraphics();
g2.drawImage(scaledImage, 0, 0,null);
}
if (format.startsWith("jp")) {
saveAsJPEG(jpgFlag, image_to_save, JPEGcompression, new BufferedOutputStream(new FileOutputStream(output_dir + pageAsString + image_name + '.' + format)));
} else {
//save image
decode_pdf.getObjectStore().saveStoredImage(
output_dir + pageAsString + image_name,
image_to_save,
true,
false,
format);
}
}
//flush images in case we do more than 1 page so only contains
//images from current page
decode_pdf.flushObjectValues(true);
}
private static void saveAsJPEG(String jpgFlag,BufferedImage image_to_save, float JPEGcompression, BufferedOutputStream fos) throws IOException {
JPEGImageWriter imageWriter = (JPEGImageWriter) ImageIO.getImageWritersBySuffix("jpeg").next();
ImageOutputStream ios = ImageIO.createImageOutputStream(fos);
imageWriter.setOutput(ios);
IIOMetadata imageMetaData = imageWriter.getDefaultImageMetadata(new ImageTypeSpecifier(image_to_save), null);
if (Common.isInteger(jpgFlag)){
int dpi = 96;
try {
dpi = Integer.parseInt(jpgFlag);
} catch (Exception e) {
e.printStackTrace();
}
Element tree = (Element) imageMetaData.getAsTree("javax_imageio_jpeg_image_1.0");
Element jfif = (Element)tree.getElementsByTagName("app0JFIF").item(0);
jfif.setAttribute("Xdensity", Integer.toString(dpi));
jfif.setAttribute("Ydensity", Integer.toString(dpi));
}
JPEGImageWriteParam jpegParams = (JPEGImageWriteParam) imageWriter.getDefaultWriteParam();
if(JPEGcompression>=0 && JPEGcompression<=1f){
jpegParams.setCompressionMode(JPEGImageWriteParam.MODE_EXPLICIT);
jpegParams.setCompressionQuality(JPEGcompression);
}
imageWriter.write(imageMetaData, new IIOImage(image_to_save, null, null), jpegParams);
ios.close();
imageWriter.dispose();
}
public static void main(String[] args) {
long start=System.currentTimeMillis();
String pdfPath = "E:\\upload\\pdf\\20140424\\Servlet.pdf";
int scaling = -1;
String format = "jpg";
String output_dir = "E:\\upload\\pdf\\20140424\\jpg\\";
String password = null;
int pageCount = 10;
ConvertPagesToImages convertPagesToImages = new ConvertPagesToImages();
convertPagesToImages.init(pdfPath, scaling, format, output_dir, password, pageCount);
System.out.println("花费时间为="+(System.currentTimeMillis()-start)/1000 + "秒");
}
}
功能说明:
1、支持对文件夹下的所有pdf转换成图片,同时也支持对单个pdf进行转换操作。
2、支持转换成jpg,jpeg,tiff,tif,png格式的图片
3、支持指定转换的图片数。
4、支持指定图片的存储位置
传入参数说明
2、format 图片格式 (支持jpg,jpeg,tiff,png) ,传参时不能带有点号
3、scaling 图片比率从1到100(100 = 全尺寸) 支持设置为-1 将保持高质量
4、output_dir 输出路径,输出路径为绝对路径
5、password 文件密码 若没有传入null值