首先把Word文档转为pdf,方法网上很多,比如用jacob、poi、pdfbox、xpdf、OpenOffice+JodConverter(Openoffice)等。
现在主要讲pdf转图片方法:
1、 Icepdf(http://www.icepdf.org/)
功能强大,转图片,直接搜索pdf,提取文本等。
例子,其中包在官方下载:
package com.test;
import java.awt.image.BufferedImage;
import java.awt.image.RenderedImage;
import java.io.File;
import java.io.IOException;
import javax.imageio.ImageIO;
import org.icepdf.core.pobjects.Document;
import org.icepdf.core.pobjects.Page;
import org.icepdf.core.util.GraphicsRenderingHints;
public class TestPdfToGif3 {
public static void main(String[] args) {
String p=System.getProperty("user.dir") + "/"+"333.pdf";
String filePath = p;
Document document = new Document();
try {
document.setFile(filePath);
} catch (Exception ex) {
}
// save page caputres to file.
float scale = 1.3f;
float rotation = 0f;
// Paint each pages content to an image and write the image to file
for (int i = 0; i < document.getNumberOfPages(); i++) {
BufferedImage image = (BufferedImage)
document.getPageImage(i,GraphicsRenderingHints.SCREEN,Page.BOUNDARY_CROPBOX, rotation, scale);
RenderedImage rendImage = image;
// capture the page image to file
try {
System.out.println("/t capturing page " + i);
File file = new File("imageCapture1_" + i + ".png");
ImageIO.write(rendImage, "png", file);
} catch (IOException e) {
e.printStackTrace();
}
image.flush();
}
// clean up resources
document.dispose();
}
}
2、 pdf-renderer(https://pdf-renderer.dev.java.net/)
转图片例子,其中包官方下载:
package com.test;
import java.awt.Color;
import java.awt.Image;
import java.awt.Rectangle;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import javax.imageio.ImageIO;
import org.apache.log4j.Logger;
import com.sun.pdfview.PDFFile;
import com.sun.pdfview.PDFPage;
/**
* 测试pdf转图片,https://pdf-renderer.dev.java.net/
*/
public class TestPdfToGif2 {
private static Logger log = Logger.getLogger(TestPdfToGif2.class);
private String pdfName;
public String getPdfName() {
return pdfName;
}
public void setPdfName(String pdfName) {
this.pdfName = pdfName;
}
public String getBasePath() {
return basePath;
}
public void setBasePath(String basePath) {
this.basePath = basePath;
}
private String fileNameNoExtMD5;
private String basePath;
/**
* 测试
*
* @param args
*/
public static void main(String[] args) {
try {
TestPdfToGif2 pg = new TestPdfToGif2();
pg.setPdfName("333");
pg.setBasePath(System.getProperty("user.dir") + "/");
pg.fileNameNoExtMD5 = pg.basePath + pg.getPdfName();
pg.pdfToGIF("gif");
} catch (Exception e) {
e.printStackTrace();
}
log.info("程序运行完了!");
}
/**
* 建立PDF文档读取类
*
* @param filePath
* PDF文件的路径
* @return null 或者PDFFile instance
*/
private static PDFFile getPdfFile(String filePath) {
try {
File file = new File(filePath);
RandomAccessFile raf = new RandomAccessFile(file, "r");
FileChannel channel = raf.getChannel();
ByteBuffer buf = channel.map(FileChannel.MapMode.READ_ONLY, 0,
channel.size());
PDFFile pdffile = new PDFFile(buf);
return pdffile;
} catch (Exception ex) {
ex.printStackTrace();
}
return null;
}
/**
* pdf文件转成gif文件 转换成功后一定要记得写磁盘
*
* @throws IOException
*/
private void pdfToGIF(String suff) throws IOException {
PDFFile pdffile =getPdfFile(fileNameNoExtMD5 + ".pdf");
int pages = pdffile.getNumPages();
File gifFile = null;
BufferedImage contImage = null;
FileOutputStream fos = null;
String giffilename = "";
for (int i = 0; i < pages; i++) {
try {
giffilename = fileNameNoExtMD5 + "." + i + ".gif";
gifFile = new File(giffilename);
if (gifFile.exists()) {
continue;
}
fos = new FileOutputStream(gifFile);
PDFPage page = pdffile.getPage(i);
Rectangle rect = new Rectangle(0, 0, (int) page.getBBox()
.getWidth(), (int) page.getBBox().getHeight());
// generate the image
Image img = page.getImage(rect.width, rect.height, rect, null,
true, true);
contImage = new BufferedImage(rect.width, rect.height,
BufferedImage.TYPE_INT_ARGB);
contImage.getGraphics().drawImage(img, 0, 0, Color.WHITE, null);
ImageIO.write(contImage, suff, fos);
} catch (Exception e) {
e.printStackTrace();
} finally {
fos.flush();
fos.close();
}
}
}
}
3、 jpedal(http://www.jpedal.org/)
有些问题,比如一些字生成图片是一块黑的。不过还是附上例子和jar包jpedal_gpl.jar(把上传的图片后缀改一下jar),可能用到的其他包请另外下载可能用到的其他包:
commons-cli-1.2.jar
commons-io-1.4.jar
FontBox-0.1.0-dev.jar
juh.jar
jurt.jar
PDFBox-0.7.3.jar
poi-3.5-beta6-20090622.jar
poi-contrib-3.5-beta6-20090622.jar
poi-ooxml-3.5-beta6-20090622.jar
poi-scratchpad-3.5-beta6-20090622.jar
ridl.jar
unoil.jar
xmlbeans-2.3.0.jar
xstream-1.3.1.jar
package com.test;
import java.awt.Color;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileOutputStream;
import javax.imageio.ImageIO;
import org.apache.log4j.Logger;
import org.jpedal.PdfDecoder;
public class TestPdfToGif1 {
private static Logger log = Logger.getLogger(TestPdfToGif1.class);
private String pdfName;
public String getPdfName() {
return pdfName;
}
public void setPdfName(String pdfName) {
this.pdfName = pdfName;
}
public String getBasePath() {
return basePath;
}
public void setBasePath(String basePath) {
this.basePath = basePath;
}
private String fileNameNoExtMD5;
private String basePath;
/**
* 测试
* @param args
*/
public static void main(String[] args) {
try {
TestPdfToGif1 pg = new TestPdfToGif1();
pg.setPdfName("333");
pg.setBasePath(System.getProperty("user.dir") + "/");
pg.fileNameNoExtMD5 = pg.basePath + pg.getPdfName();
pg.pdfToGIF();
} catch (Exception e) {
e.printStackTrace();
}
log.info("程序运行完了!");
}
private void pdfToGIF() {
StringBuffer errorpages = new StringBuffer();
int pages = 0;
String giffilename = "";
PdfDecoder decodePdf = new PdfDecoder(true);
BufferedImage imageToSave = null;
BufferedImage contImage = null;
FileOutputStream fos = null;
File gifFile = null;
int i = 0;
try {
decodePdf.openPdfFile(fileNameNoExtMD5 + ".pdf");
//decodePdf
pages = decodePdf.getPageCount();
int h = decodePdf.getPDFHeight();
int w = decodePdf.getPDFWidth();
log.info("h = " + h);
log.info("w = " + w);
for (; i < pages; i++) {
try {
giffilename = fileNameNoExtMD5 + "." + i + "_.gif";
gifFile = new File(giffilename);
if (!gifFile.exists()) {
int tmpw, tmph;
fos = new FileOutputStream(gifFile);
imageToSave = decodePdf.getPageAsImage(i + 1);
tmpw = imageToSave.getWidth();
tmph = imageToSave.getHeight();
log.info("h = " + tmph);
log.info("w = " + tmpw);
contImage = new BufferedImage(tmpw, tmph,
BufferedImage.TYPE_INT_ARGB);
contImage.getGraphics().drawImage(imageToSave, 0, 0,
Color.WHITE, null);
ImageIO.write(contImage, "gif", fos);
fos.flush();
fos.close();
//convertInfo.setTotalPage(i + 1);
// savestatus();// 不要每次都写磁盘
}
} catch (Exception e) {
errorpages.append(i).append(",");
}
}
if (errorpages.length() > 1) {
errorpages.setLength(errorpages.length() - 1);
}
//convertInfo.setErrorpages(errorpages.toString());
if (errorpages.length() > 0) {
// convertInfo.setErrorcode(3);
} else {
//convertInfo.setErrorcode(2);
}
} catch (Exception e) {
log.error(e.getMessage());
//convertInfo.setErrorcode(6);
} finally {
fos = null;
decodePdf.closePdfFile();
contImage = null;
imageToSave = null;
decodePdf = null;
gifFile = null;
}
}
}
附件: http://hi.csdn.net/attachment/201005/28/0_12750380531542.gif