写这个原因是记录下自己踩过这种坑,写出正确方法,避免下次忘记了
/**
* 读取pdf获取图片并且保存到指定路径
* @param document
* @param path
* @throws IOException
*/
public static void getPdfImages(PDDocument document,String path) throws IOException {
List<RenderedImage> images = new ArrayList<RenderedImage>();
PDImageXObject obj=null;
int i=0;
for (PDPage page : document.getPages()) {
PDResources resources = page.getResources();
for (COSName xObjectName : resources.getXObjectNames()) {
PDXObject xObject = resources.getXObject(xObjectName);
if (xObject instanceof PDFormXObject) {
continue;
}else if (xObject instanceof PDImageXObject){
obj = (PDImageXObject) xObject;
File f = new File(path+i+++"."+obj.getSuffix());
ImageIO.write(obj.getImage(), obj.getSuffix(), f);
}
}
}
}
/**
* pdf转word
* @param pdfPath
*/
public static void pdfToWord(String pdfPath){
try {
PDDocument doc = PDDocument.load(new File(pdfPath));
int pagenumber = doc.getNumberOfPages();
pdfPath = pdfPath.substring(0, pdfPath.lastIndexOf("."));
String fileName =pdfPath + ".doc";
System.out.println(fileName);
File file = new File(fileName);
if (!file.exists()) {
file.createNewFile();
}
FileOutputStream fos = new FileOutputStream(fileName);
Writer writer = new OutputStreamWriter(fos, "UTF-8");
PDFTextStripper stripper = new PDFTextStripper();
stripper.setSortByPosition(true);// 排序
stripper.setStartPage(1);// 设置转换的开始页
stripper.setEndPage(pagenumber);// 设置转换的结束页
stripper.writeText(doc, writer);
writer.close();
doc.close();
System.out.println("pdf转换word成功!");
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* dpi越大转换后越清晰,相对转换速度越慢
*/
private static final Integer DPI = 1600;
/**
* 转换后的图片类型
*/
private static final String IMG_TYPE = "png";
/**
* pdf转图片
* @param pdfPath pdf文件路径
* @param path 图片保存路径
* @param fileName 图片名称
* @throws IOException
*/
public static void pdfToImage(File pdfPath,String path,String fileName) throws IOException {
PDDocument doc = PDDocument.load(pdfPath);
PDFRenderer renderer = new PDFRenderer(doc);
int pageCount = doc.getNumberOfPages();
for (int i = 0; i < pageCount; i++) {
BufferedImage image = renderer.renderImageWithDPI(i, DPI);
ImageIO.write(image, IMG_TYPE, new File(path+fileName+i+"."+IMG_TYPE));
}
}
public static void main(String[]args) throws Exception {
PDDocument document = PDDocument.load(new File("F:\\webservice\\webservice教程.pdf"));
getPdfImages(document,"E:\\upload\\");
pdfToWord("D:/个人陈述写作提纲.pdf");
}
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.23</version>
</dependency>
</dependencies>