利用jpedal进行pdf转换成jpeg,jpg,png,tiff,tif等格式的图片

最新推荐文章于 2018-06-12 14:53:09 发布

u010452891

最新推荐文章于 2018-06-12 14:53:09 发布

阅读量2.6k

点赞数

分类专栏：综合文章标签： jpeal

综合专栏收录该内容

14 篇文章

订阅专栏

项目中运用到pdf文件转换成image图片,开始时使用pdfbox开源库进行图片转换,但是转换出来的文件中含有部分乱码的情况.下面是pdfBox 的pdf转换图片的代码示例.

try{      
         String password = null;           
         int startPage = 1;  
         String imageType = "jpg";    
         File imageFile = new File("E:\\upload\\pdf\\20140424\\Servlet."+ imageType);                     
         File pdfFile = new File("E:\\upload\\pdf\\20140424\\Servlet.pdf");       
     PDDocument document = PDDocument.load(pdfFile);  
     endPage = document.getPageCount();  
     PDFImageWriter imageWriter = new PDFImageWriter();  
     imageWriter.writeImage(document,imageType,password,startPage, endPage,imageFile.getAbsolutePath());  
     document.close();            
  
}catch(IOException  e){  
    e.printStackTrace();              
}

比较了其他的开源库之后,准备采用jpedal。但是jpedal的治疗非常少，除了官方网站外，即使是英文资料也很少。而且官方提供的代码示例中的一些方法在的lgpl授权的
jpeal的代码库中不存在。下面是收集到的一些资料

1、jpedal文档：http://javadoc.idrsolutions.com/org/jpedal/PdfDecoder.html

2、简单调用示例：http://www.idrsolutions.com/java-pdf-code-faq/#pdf2img
3、lgpl授权的jpedal库的下载地址：http://sourceforge.net/projects/jpedal/
4、转换示例示例地址：http://files.idrsolutions.com/samplecode/org/jpedal/examples/images/ConvertPagesToImages.java.html

5、高清图片转换示例地址：http://files.idrsolutions.com/samplecode/org/jpedal/examples/images/ConvertPagesToHiResImages.java.html

于是稍微修改了官方的转换示例，下面是经过测试可以使用的转换代码

import cn.com.pujiConvert.util.Common;  
  
import com.sun.imageio.plugins.jpeg.JPEGImageWriter;  
import org.jpedal.*;  
import org.jpedal.color.ColorSpaces;  
import org.jpedal.constants.PageInfo;  
import org.jpedal.exception.PdfException;  
import org.jpedal.external.Options;  
import org.jpedal.fonts.FontMappings;  
import org.jpedal.objects.PdfFileInformation;  
import org.jpedal.utils.LogWriter;  
import org.w3c.dom.Element;  
  
import javax.imageio.IIOImage;  
import javax.imageio.ImageIO;  
import javax.imageio.ImageTypeSpecifier;  
import javax.imageio.metadata.IIOMetadata;  
import javax.imageio.plugins.jpeg.JPEGImageWriteParam;  
import javax.imageio.stream.ImageOutputStream;  
import java.awt.*;  
import java.awt.image.BufferedImage;  
import java.io.*;  
import java.util.Iterator;  
  
public class ConvertPagesToImages{  
      
    /** 
     * show if image transparent  
     */  
    boolean isTransparent=false;  
      
    /**output where we put files */  
    private String user_dir = System.getProperty("user.dir");  
      
    /**use 96 dpi as default so pages correct size (72 will be smaller) */  
    private float pageScaling =1.33f;  
      
    /**flag to show if we print messages */  
    public static boolean outputMessages = false;  
      
    String output_dir=null;  
      
    /**correct separator for OS */  
    String separator = System.getProperty("file.separator");  
      
    /**the decoder object which decodes the pdf and returns a data object */  
    PdfDecoder decode_pdf = null;  
      
    //type of image to save thumbnails  
    private String format = "png";  
      
    /** holding all creators that produce OCR pdf's ocr*/  
    private String[] ocr = {"TeleForm"};  
      
    /**scaling to use - default is 100 percent */  
    private int scaling=100;  
      
    /**file password or null */  
    private String password=null;  
      
    //only used if between 0 and 1   
    private float JPEGcompression=-1f;  
      
    private int pageCount = 0;  
      
    public ConvertPagesToImages() {   
  
    }  
      
    public void init(String file_name, int scaling, String format, String output_dir, String password, int pageCount){  
        /*缩小比率*/  
        this.scaling = scaling;  
        /*图片格式*/  
        this.format = format;  
        /*输出目录*/  
        this.output_dir = output_dir;  
        /*pdf密码*/  
        this.password = password;  
        /*输出图片数*/  
        this.pageCount = pageCount;  
          
        /*判断文件是否存在*/  
        File pdf_file = new File(file_name);  
        if (!pdf_file.exists()) {  
            System.out.println("File " + pdf_file + " not found");  
            System.out.println("May need full path");  
              
            return;  
        }  
         
        extraction(file_name, output_dir);     
    }  
      
    private void extraction(String file_name, String output_dir) {          
        this.output_dir=output_dir;  
  
        if (!user_dir.endsWith(separator)){  
            user_dir = user_dir + separator;  
        }  
          
        if (file_name.toLowerCase().endsWith(".pdf")) {  
              
            if(output_dir==null){  
                output_dir=user_dir + "thumbnails" + separator;  
            }  
              
            decodeFile(file_name,output_dir);  
        } else {  
            String[] files = null;  
            File inputFiles;  
              
            if (!file_name.endsWith(separator)){  
                file_name = file_name + separator;  
            }  
              
            try {  
                inputFiles = new File(file_name);  
                  
                if (!inputFiles.isDirectory()) {  
                    System.err.println(file_name + " is not a directory. Exiting program");  
                }else{  
                    files = inputFiles.list();  
                }  
            } catch (Exception ee) {  
                LogWriter.writeLog("Exception trying to access file " + ee.getMessage());  
                  
            }  
              
            if(files!=null){  
                for (String file : files) {  
                      
                    if (file.toLowerCase().endsWith(".pdf")) {  
                        if (outputMessages){  
                            System.out.println(file_name + file);  
                        }  
                          
                        decodeFile(file_name + file, output_dir);  
                    }  
                }  
            }  
        }  
          
        if(outputMessages){  
            System.out.println("Thumbnails created");  
        }  
    }  
      
    /** 
     * routine to decode a file  
     */  
    private void decodeFile(String file_name,String output_dir) {  
        String name = "demo"; //set a default just in case  
          
        int pointer = file_name.lastIndexOf(separator);  
          
        if(pointer==-1){  
            pointer = file_name.lastIndexOf('/');  
        }  
          
        if (pointer != -1){  
            name = file_name.substring(pointer + 1, file_name.length() - 4);  
        }else if((file_name.toLowerCase().endsWith(".pdf"))){  
            name=file_name.substring(0,file_name.length()-4);  
        }  
          
        //fix for odd files on Linux created when you view pages  
        if(name.startsWith(".")){  
            return;  
        }  
          
        //create output dir for images  
        if(output_dir==null){  
            output_dir = user_dir + "thumbnails" + separator ;  
        }  
          
        //PdfDecoder returns a PdfException if there is a problem  
        try {  
            if(decode_pdf==null){  
                decode_pdf = new PdfDecoder(true);  
            }  
              
            /**optional JAI code for faster rendering*/  
            org.jpedal.external.ImageHandler myExampleImageHandler=new org.jpedal.examples.handlers.ExampleImageDrawOnScreenHandler();  
            decode_pdf.addExternalHandler(myExampleImageHandler, Options.ImageHandler);  
              
            //mappings for non-embedded fonts to use  
            FontMappings.setFontReplacements();  
  
            //true as we are rendering page  
            decode_pdf.setExtractionMode(0, pageScaling);  
            //don't bother to extract text and images  
              
            /** 
             * open the file (and read metadata including pages in  file) 
             */  
            if (outputMessages){  
                System.out.println("Opening file :" + file_name);  
            }  
              
            if(password != null && password != ""){  
                decode_pdf.openPdfFile(file_name,password);  
            }else{  
                decode_pdf.openPdfFile(file_name);  
            }  
              
        } catch (Exception e) {  
            System.err.println("8.Exception " + e + " in pdf code in "+file_name);  
        }  
          
        /** 
         * extract data from pdf (if allowed). 
         */  
        if(decode_pdf.isEncrypted() && !decode_pdf.isFileViewable()){  
            throw new RuntimeException("Wrong password password used=>"+password+ '<');  
        }else if ((decode_pdf.isEncrypted()&&(!decode_pdf.isPasswordSupplied())) && (!decode_pdf.isExtractionAllowed())) {  
            throw new RuntimeException("Extraction not allowed");  
        } else {  
            extractPageAsImage(file_name, output_dir, name, isTransparent);  
        }  
          
        /**close the pdf file */  
        decode_pdf.closePdfFile();     
    }  
      
    private void extractPageAsImage(String file_name, String output_dir, String name, boolean isTransparent) {     
        //create a directory if it doesn't exist  
        File output_path = new File(output_dir);  
        if (!output_path.exists()){  
            output_path.mkdirs();  
        }  
          
        boolean isSingleOutputFile=false;  
        boolean compressTiffs = false;  
        String rawJPEGComp = null;     
        String jpgFlag = "96";          
          
        //page range  
        int start = 1,  end = decode_pdf.getPageCount();  
          
        end = (pageCount == 0) ? end : pageCount;  
          
        if (outputMessages){  
            System.out.println("Thumbnails will be in  " + output_dir);  
        }  
          
        try {  
            BufferedImage[] multiPages = new BufferedImage[1 + (end - start)];  
              
            for (int page = start; page < end + 1; page++){  
                getPage(output_dir, name, isTransparent, isSingleOutputFile,rawJPEGComp, jpgFlag, compressTiffs, start, end,multiPages, page);  
            }  
        } catch (Exception e) {  
            decode_pdf.closePdfFile();  
            throw new RuntimeException("Exception " + e.getMessage()+" with thumbnails on File="+file_name);  
        }  
    }  
      
    private void getPage(  
            String output_dir,   
            String name,   
            boolean isTransparent,  
            boolean isSingleOutputFile,   
            String rawJPEGComp,   
            String jpgFlag,  
            boolean compressTiffs,   
            int start,   
            int end,  
            BufferedImage[] multiPages,   
            int page  
    ) throws PdfException, IOException, FileNotFoundException {  
        if (outputMessages ){  
            System.out.println("Page " + page);  
        }  
          
        /** 
         * 补0操作 
         */  
        String pageAsString = String.valueOf(page);  
        String maxPageSize  = String.valueOf(end);  
        int padding         = maxPageSize.length()-pageAsString.length();  
              
        for(int ii = 0; ii < padding; ii++){  
            pageAsString = '0' + pageAsString;  
        }  
          
        String image_name;  
        if(isSingleOutputFile){  
            image_name =name;  
        }else{  
            image_name =name+"_page_" + pageAsString;  
        }  
          
        /** 
         * get PRODUCER and if OCR disable text printing 
         */  
        PdfFileInformation currentFileInformation = decode_pdf.getFileInformationData();  
          
        String[] values=currentFileInformation.getFieldValues();  
        String[] fields=PdfFileInformation.getFieldNames();  
              
        for(int i=0;i<fields.length;i++){  
            if(fields[i].equals("Creator")){        
                for (String anOcr : ocr) {    
                    if (values[i].equals(anOcr)) {                             
                        decode_pdf.setRenderMode(PdfDecoder.RENDERIMAGES);                              
                    }  
                }  
            }  
        }  
              
        BufferedImage image_to_save;  
        if(!isTransparent){  
            image_to_save=decode_pdf.getPageAsImage(page);  
        }else{   
            //use this if you want a transparent image   
            image_to_save =decode_pdf.getPageAsTransparentImage(page);  
              
            //java adds odd tint if you save this as JPEG which does not have transparency  
            // so put as RGB on white background  
            // (or save as PNG or TIFF which has transparency)  
            // or just call decode_pdf.getPageAsImage(page)  
            if(image_to_save!=null && format.toLowerCase().startsWith("jp")){  
                  
                BufferedImage rawVersion=image_to_save;  
                  
                int w=rawVersion.getWidth(), h=rawVersion.getHeight();  
                //blank canvas  
                image_to_save = new BufferedImage(w,h , BufferedImage.TYPE_INT_RGB);  
                  
                //  
                Graphics2D g2 = image_to_save.createGraphics();  
                //white background  
                g2.setPaint(Color.WHITE);  
                g2.fillRect(0,0,w,h);  
                //paint on image  
                g2.drawImage(rawVersion, 0, 0,null);  
            }  
        }  
          
        /*if just gray we can reduce memory usage by converting image to Grayscale 
 
         
        @SuppressWarnings("rawtypes") 
        Iterator colorspacesUsed = decode_pdf.getPageInfo(PageInfo.COLORSPACES); 
         
        int nextID; 
        boolean isGrayOnly=colorspacesUsed!=null; //assume true and disprove 
        while(colorspacesUsed!=null && colorspacesUsed.hasNext()){ 
            nextID= (Integer) (colorspacesUsed.next()); 
             
            if(nextID!= ColorSpaces.DeviceGray && nextID!=ColorSpaces.CalGray){ 
                isGrayOnly=false; 
            } 
        } 
         
        //draw onto GRAY image to reduce colour depth 
        if(isGrayOnly){ 
            BufferedImage image_to_save2=new BufferedImage(image_to_save.getWidth(),image_to_save.getHeight(), BufferedImage.TYPE_BYTE_GRAY); 
            image_to_save2.getGraphics().drawImage(image_to_save,0,0,null); 
            image_to_save = image_to_save2; 
        } 
         
        //put image in array if multi-images 
        if(isSingleOutputFile){ 
            multiPages[page-start] = image_to_save; 
        } 
         
        if (image_to_save != null) { 
             
            /**BufferedImage does not support any dpi concept. A higher dpi can be created 
             * using JAI to convert to a higher dpi image*/  
              
            //shrink the page to 50% with graphics2D transformation  
            //- add your own parameters as needed  
            //you may want to replace null with a hints object if you  
            //want to fine tune quality.  
              
            /** example 1 biliniear scaling 
             AffineTransform scale = new AffineTransform(); 
             scale.scale(.5, .5); //50% as a decimal 
             AffineTransformOp scalingOp =new AffineTransformOp(scale, null); 
             image_to_save =scalingOp.filter(image_to_save, null); 
 
             */  
              
            /** example 2 bicubic scaling - better quality but slower 
             to preserve aspect ratio set newWidth or newHeight to -1*/  
              
            /**allow user to specify maximum dimension for thumbnail*/  
            int maxDimension = -1;  
              
            if(scaling!=100 || maxDimension != -1){  
                int newWidth=image_to_save.getWidth()*scaling/100;  
                int newHeight=image_to_save.getHeight()*scaling/100;  
                  
                Image scaledImage;  
                if(maxDimension != -1 && (newWidth > maxDimension || newHeight > maxDimension)){  
                    if(newWidth > newHeight){  
                        newWidth = maxDimension;  
                        scaledImage= image_to_save.getScaledInstance(newWidth,-1,BufferedImage.SCALE_SMOOTH);  
                    } else {  
                        newHeight = maxDimension;  
                        scaledImage= image_to_save.getScaledInstance(-1,newHeight,BufferedImage.SCALE_SMOOTH);  
                    }  
                } else {  
                    scaledImage= image_to_save.getScaledInstance(newWidth,-1,BufferedImage.SCALE_SMOOTH);  
                }  
                  
                if(format.toLowerCase().startsWith("jp")){  
                    image_to_save = new BufferedImage(scaledImage.getWidth(null),scaledImage.getHeight(null) , BufferedImage.TYPE_INT_RGB);  
                }else{  
                    image_to_save = new BufferedImage(scaledImage.getWidth(null),scaledImage.getHeight(null) , BufferedImage.TYPE_INT_ARGB);  
                }  
                  
                Graphics2D g2 = image_to_save.createGraphics();  
                  
                g2.drawImage(scaledImage, 0, 0,null);  
            }  
  
            if (format.startsWith("jp")) {  
                saveAsJPEG(jpgFlag, image_to_save, JPEGcompression, new BufferedOutputStream(new FileOutputStream(output_dir + pageAsString + image_name + '.' + format)));                  
            } else {  
                //save image  
                decode_pdf.getObjectStore().saveStoredImage(  
                        output_dir + pageAsString + image_name,  
                        image_to_save,  
                        true,  
                        false,  
                        format);  
            }     
        }  
          
        //flush images in case we do more than 1 page so only contains  
        //images from current page  
        decode_pdf.flushObjectValues(true);              
    }  
      
    private static void saveAsJPEG(String jpgFlag,BufferedImage image_to_save, float JPEGcompression, BufferedOutputStream fos) throws IOException {  
        JPEGImageWriter imageWriter = (JPEGImageWriter) ImageIO.getImageWritersBySuffix("jpeg").next();  
        ImageOutputStream ios = ImageIO.createImageOutputStream(fos);  
        imageWriter.setOutput(ios);  
          
        IIOMetadata imageMetaData = imageWriter.getDefaultImageMetadata(new ImageTypeSpecifier(image_to_save), null);  
          
        if (Common.isInteger(jpgFlag)){  
              
            int dpi = 96;  
              
            try {  
                dpi = Integer.parseInt(jpgFlag);  
            } catch (Exception e) {  
                e.printStackTrace();  
            }  
              
            Element tree = (Element) imageMetaData.getAsTree("javax_imageio_jpeg_image_1.0");  
            Element jfif = (Element)tree.getElementsByTagName("app0JFIF").item(0);  
            jfif.setAttribute("Xdensity", Integer.toString(dpi));  
            jfif.setAttribute("Ydensity", Integer.toString(dpi));  
        }  
          
        JPEGImageWriteParam jpegParams = (JPEGImageWriteParam) imageWriter.getDefaultWriteParam();  
        if(JPEGcompression>=0 && JPEGcompression<=1f){       
            jpegParams.setCompressionMode(JPEGImageWriteParam.MODE_EXPLICIT);  
            jpegParams.setCompressionQuality(JPEGcompression);  
              
        }  
          
        imageWriter.write(imageMetaData, new IIOImage(image_to_save, null, null), jpegParams);  
        ios.close();  
        imageWriter.dispose();  
    }  
      
     public static void main(String[] args) {     
         long start=System.currentTimeMillis();  
               
         String pdfPath = "E:\\upload\\pdf\\20140424\\Servlet.pdf";  
         int scaling = -1;  
         String format = "jpg";  
         String output_dir = "E:\\upload\\pdf\\20140424\\jpg\\";  
         String password = null;  
         int pageCount = 10;  
  
         ConvertPagesToImages convertPagesToImages = new ConvertPagesToImages();  
         convertPagesToImages.init(pdfPath, scaling, format, output_dir, password, pageCount);  
       
         System.out.println("花费时间为="+(System.currentTimeMillis()-start)/1000 + "秒");  
    }           
}

功能说明：

1、支持对文件夹下的所有pdf转换成图片，同时也支持对单个pdf进行转换操作。

2、支持转换成jpg，jpeg，tiff，tif，png格式的图片

3、支持指定转换的图片数。

4、支持指定图片的存储位置

传入参数说明

1、pdfPath pdf文件绝对路径，可以是pdf所在的目录也可以是pdf文件路径
2、format 图片格式 (支持jpg,jpeg,tiff,png) ，传参时不能带有点号
3、scaling 图片比率从1到100(100 = 全尺寸) 支持设置为-1 将保持高质量
4、output_dir 输出路径，输出路径为绝对路径
5、password 文件密码若没有传入null值