业务:pdf转图片问题(解决非标准pdf转图片空白问题)

业务说明:由于业务相关要求,对接第三方PDF文件转JPG之后上传服务器,同时还进行ocr识别。
看了一下CSDN上面的一些帖子,大部分都无法处理非标准的PDF文件,比如用打印机扫描的PDF文件可能是歪着的,就会转换图片失败。
前提:使用Ghostscript转换。需要先安装gs软件,然后将path: D:/gs/bin/gs9.23/bin/gswin32c 写入到gsPath配置文件中。后面进行调用。
依赖包:

	<dependency>
			<groupId>org.jodconverter</groupId>
			<artifactId>jodconverter-core</artifactId>
			<version>4.1.0</version>
		</dependency>

导入:

import com.github.tobato.fastdfs.domain.StorePath;
import com.github.tobato.fastdfs.service.FastFileStorageClient;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.PdfReader;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.concurrent.CountDownLatch;

1、核心方法 pdf2Image。logParam和caseOrderNo随便传值就好,id业务需求对图片上传服务器操作使用。

 private static final String GS_TEMP_FOLDER_NAME = "gstemp";    //生成pdf和图片文件的临时目录

 @Value("${gs.path}")
 private String gsPath;   //配置文件中gs的配置路径

public void pdf2Image(byte[] pdfBytes, String logParam,String caseOrderNo,String id) throws Exception {
        //1. 创建临时目录,临时存储生成的图片数据
        String path = GS_TEMP_FOLDER_NAME + File.separator + UUID.randomUUID() + File.separator + UUID.randomUUID();
        File pdfFile = FileUtil.createFile(path, logParam);
        if (pdfFile == null) {
            throw new Exception("创建临时目录失败!");
        }

        //2. 将pdf字节流写入文件中
        try {
            FileUtils.writeByteArrayToFile(new File(path), pdfBytes);
        } catch (IOException e) {
            log.error("将pdf转成图片出现IOException. logParam = " + logParam, e);
            throw new Exception("将pdf转成图片出现异常!!");
        }

        //3. 将pdf转成图片,组拼返回体
        return pdf2Image(pdfFile, logParam,caseOrderNo,id);
    }
--------------------------------------------------------
private List<UploadFileDto> pdf2Image(File pdfFile, String logParam, String caseOrderNo,String id) throws Exception {
        long startTime = System.currentTimeMillis();
        List<UploadFileDto> imgList = new ArrayList<>();
        String imgPath = pdfFile.getParent();

        PdfReader pdfReader = null;
        try{
        	pdfReader = new PdfReader(new FileInputStream(pdfFile));
        } catch (Exception e) {
            e.printStackTrace();
            throw new Exception("将pdf转成图片出现异常!");
        }
        
        int pageCount = pdfReader.getNumberOfPages();
        if (pageCount == 0) {
            throw new Exception("将pdf转成图片出现异常!");
        }
        
        Rectangle rectangle = pdfReader.getPageSize(1);
        float width = rectangle.getWidth();
        float height = rectangle.getHeight();
        String resolution = "";
        String downScaleFactor = "";
		//根据pdf的大小,之后对图片压缩
        if (width < 1487 && height < 2105) {
        	resolution = "-r300";
            downScaleFactor = "-dDownScaleFactor=1";
        } else if (width < 2451 && height < 3508) {
            resolution = "-r300";
            downScaleFactor = "-dDownScaleFactor=2";
        } else if (width * 0.5 <= 2479 && height * 0.5 <= 3508) {
            resolution = "-r150";
            downScaleFactor = "-dDownScaleFactor=2";
        } else if (width * 0.4 <= 2479 && height * 0.4 <= 3508) {
            resolution = "-r120";
            downScaleFactor = "-dDownScaleFactor=3";
        } else {
            resolution = "-r72";
            downScaleFactor = "-dDownScaleFactor=3";
        }
        Map<String, String> param = new HashMap<>();
        param.put("resolution", resolution);
        param.put("downScaleFactor", downScaleFactor);
            
        log.info("获取到pdf总页数为... pageCount = {}, logParam = {}", pageCount, logParam);
        //2. 线程池
        CountDownLatch countDownLatch = new CountDownLatch(pageCount);
        for (int i = 1; i <= pageCount; i++) {
            final int j = i;
//如果是多页pdf可以用线程池,本业务单页pdf所以注释掉
//            imgThreadPool.execute(new Runnable() {
//                @Override
//                public void run() {
                    //图片文件
                    File tempImage = new File(imgPath + File.separator + UUID.randomUUID().toString() + ".jpg");
                    //拼接命令行参数
                    String[] gsArgs = {gsPath, "-dNOPAUSE", "-dBATCH", "-dSAFER", "-sDEVICE=jpeg", "-sDisplayHandle=0", param.get("resolution"), param.get("downScaleFactor")};
                    try {
                        gsArgs = Arrays.copyOf(gsArgs, gsArgs.length + 4);
                        // pdf路径参数 必须放在最后
                        gsArgs[gsArgs.length - 1] = pdfFile.getAbsolutePath();
                        // 设置需要处理的页码范围,一张一张的处理
                        gsArgs[gsArgs.length - 4] = "-dFirstPage=" + (j);
                        gsArgs[gsArgs.length - 3] = "-dLastPage=" + (j);
                        gsArgs[gsArgs.length - 2] = "-sOutputFile=" + tempImage.getAbsolutePath();
                        log.info("pdf2Image gsArgs is: {}", JSONObject.toJSONString(gsArgs));
                        Process proc = new ProcessBuilder(gsArgs).redirectErrorStream(true).start();
                        //读取进程的流
                        while (proc.isAlive()) {
                            List<String> output = IOUtils.readLines(proc.getInputStream());
                            output.forEach((String line) -> log.info(line));
                        }   
                        // 线程等待,等待处理完毕
                        int exitValue;
                        if ((exitValue = proc.waitFor()) != 0) {
                            log.error("pdf转为图片转换失败 gs进程返回错误码为: {}, logParam = {}", exitValue, logParam);
                            return null;
                        }
                        //读取图片流
                        byte[] imgByte = FileUtils.readFileToByteArray(tempImage);
                        UploadFileDto fileDto = new UploadFileDto();
                        fileDto.setFileType(FileTypeEnum.JPG.getType());
                        fileDto.setFileStream(imgByte);
                        fileDto.setSort(j);
                        imgList.add(fileDto);

                        //得到图片文件tempImage之后,根据各项目逻辑对图片进行相关操作
                        //.................
                    } catch (Exception e) {
                        log.error("pdf转为图片转换出现特殊异常...logParam = " + logParam, e);
                    } finally {
                    	log.debug("pdf转为图片 finally ===");
                        countDownLatch.countDown();
                    }
				
        }
        try {
            countDownLatch.await();
        } catch (InterruptedException e) {
            log.error("pdf转为图片时,出现InterruptedException...logParam = " + logParam, e);
            throw new Exception("将pdf转成图片出现异常!");
        }
        //删除文件夹以及文件
        FileUtil.deleteFileAndFolder(imgPath);
        return imgList;
    }

上诉代码有些地方书写解释比较麻烦,记录下给大家使用就好。

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值