文档展示:PDFRender 将PDF转换为图片 多线程处理 提高效率

上接 文档展示:PDFRender 将PDF转换为图片
[url]http://zhuyufufu.iteye.com/admin/blogs/2012236[/url]

本篇文章研究如何利用多线程技术提高PDF转图片的效率(减少用时)

对上一篇的例子加上用时统计:

long beginTime = System.nanoTime();
PDFRenderTest.convert(inputPDFPath, outputFDir);
long endTime = System.nanoTime();

System.out.println("耗时: " + (endTime - beginTime) / 1000000000 + " 秒" );


重写代码为多线程,暂时一页PDF起一个线程

线程代码


package com.zas.pdfrender.test;

import java.awt.Graphics2D;
import java.awt.Image;
import java.awt.Rectangle;
import java.awt.image.BufferedImage;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;

import com.sun.image.codec.jpeg.ImageFormatException;
import com.sun.image.codec.jpeg.JPEGCodec;
import com.sun.image.codec.jpeg.JPEGEncodeParam;
import com.sun.image.codec.jpeg.JPEGImageEncoder;
import com.sun.pdfview.PDFPage;

public class PDFThread implements Runnable{
PDFPage page;
int i;
String outputFDir;
public PDFThread(PDFPage page, int i, String outputFDir) {
this.page = page;
this.i = i;
this.outputFDir = outputFDir;
}
@Override
public void run() {
Rectangle rect = new Rectangle(0, 0, (int) page.getBBox().getWidth(), (int) page.getBBox().getHeight());
Image img = page.getImage(rect.width, rect.height, // width & height
rect, // clip rect
null, // null for the ImageObserver
true, // fill background with white
true // block until drawing is done
);
BufferedImage tag = new BufferedImage(rect.width, rect.height, BufferedImage.TYPE_INT_RGB);

Graphics2D g=tag.createGraphics();
//g.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
g.drawImage(img, 0, 0, rect.width, rect.height, null);
FileOutputStream out;
try {
out = new FileOutputStream(outputFDir + i + ".png");
} catch (FileNotFoundException e1) {
e1.printStackTrace();
return;
} // 输出到文件流
JPEGImageEncoder encoder = JPEGCodec.createJPEGEncoder(out);
JPEGEncodeParam param2 = encoder.getDefaultJPEGEncodeParam(tag);
param2.setQuality(1f, false);// 1f是提高生成的图片质量
encoder.setJPEGEncodeParam(param2);
try {
encoder.encode(tag);
} catch (ImageFormatException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} // JPEG编码
try {
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}


主代码:

package com.zas.pdfrender.test;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;

import com.sun.pdfview.PDFFile;
import com.sun.pdfview.PDFPage;

public class PDFRenderTest {

public static void convert(String inputPDFPath, String outputFDir) throws IOException, FileNotFoundException {
//pdf文件存在校验,输出文件夹创建
File file = new File(inputPDFPath);
if(!file.exists()){
throw new FileNotFoundException("文件不存在: " + inputPDFPath);
}
File outputFolder = new File(outputFDir);
if(!outputFolder.exists()){
outputFolder.mkdirs();
}

//获取PDFFile
RandomAccessFile raf = new RandomAccessFile(file, "r");
FileChannel channel = raf.getChannel();
ByteBuffer buf = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size());
channel.close();
raf.close();
PDFFile pdffile = new PDFFile(buf);

System.out.println("PDF页数: " + pdffile.getNumPages() + " , " + inputPDFPath);

//转化处理
for (int i = 1; i <= pdffile.getNumPages(); i++) {
PDFPage page = pdffile.getPage(i);
PDFThread thread = new PDFThread(page, i, outputFDir);
new Thread(thread).start();
}
}

public static void main(final String[] args) throws FileNotFoundException, IOException {
String inputPDFPath = "D:\\pdf\\ppt\\2010110东南大学档案管理系统需求分析说明书正式.pdf";
String outputFDir = "D:\\pdf\\222222222222010110系统需求分析说明书正式\\";
long beginTime = System.nanoTime();
PDFRenderTest.convert(inputPDFPath, outputFDir);
long endTime = System.nanoTime();

System.out.println("耗时: " + (endTime - beginTime) / 1000000000 + " 秒" );
}
}


代码问题有两个:

1. 1页起一个线程肯定浪费了

2. 计时程序出了问题,计不了时了

先解决计时问题

原始的想法有两个:
1. 使用一个计数器,其大小等于线程数,每一线程执行完后就减一,当其为0时执行最后的计时
2. 使用一个布尔数组,其大小等于线程数,每一线程执行完后就置对应的布尔值为已完成,主程序轮询数组,当其全部为已完成时执行最后计时

但是这两种想法怎么看怎么不高端大气上档次,继续查资料,找到两个JDK自带的类 CyclicBarrier与CountDownLatch。这两个类都能实现多线程计时,而CountDownLatch好像更符合我的要求,就采用它了。

改代码

PDF线程修改

package com.zas.pdfrender.test;

import java.awt.Graphics2D;
import java.awt.Image;
import java.awt.Rectangle;
import java.awt.image.BufferedImage;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.concurrent.CountDownLatch;

import com.sun.image.codec.jpeg.ImageFormatException;
import com.sun.image.codec.jpeg.JPEGCodec;
import com.sun.image.codec.jpeg.JPEGEncodeParam;
import com.sun.image.codec.jpeg.JPEGImageEncoder;
import com.sun.pdfview.PDFPage;

public class PDFThread implements Runnable{
PDFPage page;
int i;
String outputFDir;
CountDownLatch latch;
public PDFThread(PDFPage page, int i, String outputFDir, CountDownLatch latch) {
this.page = page;
this.i = i;
this.outputFDir = outputFDir;
this.latch = latch;
}
@Override
public void run() {
Rectangle rect = new Rectangle(0, 0, (int) page.getBBox().getWidth(), (int) page.getBBox().getHeight());
Image img = page.getImage(rect.width, rect.height, // width & height
rect, // clip rect
null, // null for the ImageObserver
true, // fill background with white
true // block until drawing is done
);
BufferedImage tag = new BufferedImage(rect.width, rect.height, BufferedImage.TYPE_INT_RGB);

Graphics2D g=tag.createGraphics();
//g.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
g.drawImage(img, 0, 0, rect.width, rect.height, null);
FileOutputStream out;
try {
out = new FileOutputStream(outputFDir + i + ".png");
} catch (FileNotFoundException e1) {
e1.printStackTrace();
return;
} // 输出到文件流
JPEGImageEncoder encoder = JPEGCodec.createJPEGEncoder(out);
JPEGEncodeParam param2 = encoder.getDefaultJPEGEncodeParam(tag);
param2.setQuality(1f, false);// 1f是提高生成的图片质量
encoder.setJPEGEncodeParam(param2);
try {
encoder.encode(tag);
} catch (ImageFormatException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} // JPEG编码
try {
out.close();
} catch (IOException e) {
e.printStackTrace();
}
latch.countDown();
}
}

主程序修改

package com.zas.pdfrender.test;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.concurrent.CountDownLatch;

import com.sun.pdfview.PDFFile;
import com.sun.pdfview.PDFPage;

public class PDFRenderTest {

public static void convert(String inputPDFPath, String outputFDir) throws IOException, FileNotFoundException {
long beginTime = System.nanoTime();
//pdf文件存在校验,输出文件夹创建
File file = new File(inputPDFPath);
if(!file.exists()){
throw new FileNotFoundException("文件不存在: " + inputPDFPath);
}
File outputFolder = new File(outputFDir);
if(!outputFolder.exists()){
outputFolder.mkdirs();
}

//获取PDFFile
RandomAccessFile raf = new RandomAccessFile(file, "r");
FileChannel channel = raf.getChannel();
ByteBuffer buf = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size());
channel.close();
raf.close();
PDFFile pdffile = new PDFFile(buf);

System.out.println("PDF页数: " + pdffile.getNumPages() + " , " + inputPDFPath);
CountDownLatch latch=new CountDownLatch(pdffile.getNumPages());
//转化处理
for (int i = 1; i <= pdffile.getNumPages(); i++) {
PDFPage page = pdffile.getPage(i);
PDFThread thread = new PDFThread(page, i, outputFDir, latch);
new Thread(thread).start();
}
try {
latch.await();
long endTime = System.nanoTime();
System.out.println("耗时: " + (endTime - beginTime) / 1000000000 + " 秒" );
} catch (InterruptedException e) {
e.printStackTrace();
}
}

public static void main(final String[] args) throws FileNotFoundException, IOException {
String inputPDFPath = "D:\\pdf\\ppt\\2010110东南大学档案管理系统需求分析说明书正式.pdf";
String outputFDir = "D:\\pdf\\222222222222010110系统需求分析说明书正式\\";
PDFRenderTest.convert(inputPDFPath, outputFDir);
}
}


测试结果:
对于一个79页的PDF,不开线程用时8秒,开79个线程用时5秒
对于一个634页的PDF,不开线程用时459秒,开不了634个线程,改进线程程序

PDFThred:

package com.zas.pdfrender.test;

import java.awt.Graphics2D;
import java.awt.Image;
import java.awt.Rectangle;
import java.awt.image.BufferedImage;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Map;
import java.util.concurrent.CountDownLatch;

import com.sun.image.codec.jpeg.ImageFormatException;
import com.sun.image.codec.jpeg.JPEGCodec;
import com.sun.image.codec.jpeg.JPEGEncodeParam;
import com.sun.image.codec.jpeg.JPEGImageEncoder;
import com.sun.pdfview.PDFPage;

public class PDFThread implements Runnable{
Map<Integer, PDFPage> map;
String outputFDir;
CountDownLatch latch;
public PDFThread(Map<Integer, PDFPage> map, String outputFDir, CountDownLatch latch) {
this.map = map;
this.outputFDir = outputFDir;
this.latch = latch;
}
@Override
public void run() {
for (Integer key : map.keySet()) {
this.convert(map.get(key), key);
}
latch.countDown();
}

private void convert(PDFPage page, Integer i) {
Rectangle rect = new Rectangle(0, 0, (int) page.getBBox().getWidth(), (int) page.getBBox().getHeight());
Image img = page.getImage(rect.width, rect.height, // width & height
rect, // clip rect
null, // null for the ImageObserver
true, // fill background with white
true // block until drawing is done
);
BufferedImage tag = new BufferedImage(rect.width, rect.height, BufferedImage.TYPE_INT_RGB);

Graphics2D g=tag.createGraphics();
//g.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
g.drawImage(img, 0, 0, rect.width, rect.height, null);
FileOutputStream out;
try {
out = new FileOutputStream(outputFDir + i + ".png");
} catch (FileNotFoundException e1) {
e1.printStackTrace();
return;
} // 输出到文件流
JPEGImageEncoder encoder = JPEGCodec.createJPEGEncoder(out);
JPEGEncodeParam param2 = encoder.getDefaultJPEGEncodeParam(tag);
param2.setQuality(1f, false);// 1f是提高生成的图片质量
encoder.setJPEGEncodeParam(param2);
try {
encoder.encode(tag);
} catch (ImageFormatException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} // JPEG编码
try {
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}


主程序:

package com.zas.pdfrender.test;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.concurrent.CountDownLatch;

import com.sun.pdfview.PDFFile;
import com.sun.pdfview.PDFPage;

public class PDFRenderTest {

public static void convert(String inputPDFPath, String outputFDir) throws IOException, FileNotFoundException {
long beginTime = System.nanoTime();
//pdf文件存在校验,输出文件夹创建
File file = new File(inputPDFPath);
if(!file.exists()){
throw new FileNotFoundException("文件不存在: " + inputPDFPath);
}
File outputFolder = new File(outputFDir);
if(!outputFolder.exists()){
outputFolder.mkdirs();
}

//获取PDFFile
RandomAccessFile raf = new RandomAccessFile(file, "r");
FileChannel channel = raf.getChannel();
ByteBuffer buf = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size());
channel.close();
raf.close();
PDFFile pdffile = new PDFFile(buf);

System.out.println("PDF页数: " + pdffile.getNumPages() + " , " + inputPDFPath);
int threadNumber = 0;
if(pdffile.getNumPages() % 8 != 0){
threadNumber = pdffile.getNumPages() / 8 + 1;
}else{
threadNumber = pdffile.getNumPages() / 8 ;
}
CountDownLatch latch=new CountDownLatch(threadNumber);
//转化处理
int threadCount = 0;
Map<Integer, PDFPage> map = new LinkedHashMap<Integer, PDFPage>();
for (int i = 1; i <= pdffile.getNumPages(); i++) {
PDFPage page = pdffile.getPage(i);
map.put(i, page);
if(i % 8 == 0){
PDFThread thread = new PDFThread(map, outputFDir, latch);
new Thread(thread).start();
threadCount++;
map = new LinkedHashMap<Integer, PDFPage>();
}
}
System.out.println("threadCount = " + threadCount);
if(map.size() > 0){
PDFThread thread = new PDFThread(map, outputFDir, latch);
new Thread(thread).start();
threadCount++;
}
System.out.println("threadCount = " + threadCount + " : map size = " + map.size());
try {
latch.await();
long endTime = System.nanoTime();
System.out.println("耗时: " + (endTime - beginTime) / 1000000000 + " 秒" );
} catch (InterruptedException e) {
e.printStackTrace();
}
}

public static void main(final String[] args) throws FileNotFoundException, IOException {
// String inputPDFPath = "D:\\pdf\\2010110东南大学档案管理系统需求分析说明书正式.pdf";
String inputPDFPath = "D:\\pdf\\面向对象软件构造(第二版)中英对照版.pdf";
// String inputPDFPath = "D:\\pdf\\Linux命令行技术大全.pdf";
String outputFDir = "D:\\pdf\\222222222222010110系统需求分析说明书正式\\";
PDFRenderTest.convert(inputPDFPath, outputFDir);
}
}


测试结果:

对于544页的PDF,开不开线程对于时间影响不大,应该在图像处理部分阻塞掉了
在继续定位处理
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值