1.到PDFbox官网下载相关的jar包
https://pdfbox.apache.org/download
2.读取文件并进行相应操作
package kang;
import java.awt.print.Book;
import java.awt.print.PageFormat;
import java.awt.print.Paper;
import java.awt.print.PrinterException;
import java.awt.print.PrinterJob;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Iterator;
import javax.print.attribute.HashPrintRequestAttributeSet;
import javax.print.attribute.PrintRequestAttributeSet;
import javax.print.attribute.standard.PageRanges;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.io.RandomAccessBuffer;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.printing.PDFPageable;
import org.apache.pdfbox.printing.PDFPrintable;
import org.apache.pdfbox.text.PDFTextStripper;
public class PdfReader {
public static void main(String[] args) throws IOException{
// 待解析PDF
File pdfFile = new File("C:\\Users\\Administrator\\Desktop\\java线程池执行原理分析.pdf");
// 空白PDF
File pdfFile_out = new File("C:\\Users\\Administrator\\Desktop\\Doc1.pdf");
File pdfFile_outDoc = new File("C:\\Users\\Administrator\\Desktop\\3.doc");
String imagePath="C:\\Users\\Administrator\\Desktop\\1.jpg";
test1(pdfFile,imagePath,pdfFile_out,pdfFile_outDoc);
}
/*输出到doc文件,没有图片
* */
private static void PrintToDoc(PDDocument document,File pdfFile_outDoc) throws IOException {
// 获取页码
int pages = document.getNumberOfPages();
// 读文本内容
PDFTextStripper stripper=new PDFTextStripper();
// 设置按顺序输出
FileOutputStream fos=new FileOutputStream(pdfFile_outDoc);
Writer writer=new OutputStreamWriter(fos,"UTF-8");
stripper.setSortByPosition(true);
stripper.setStartPage(1);
stripper.setEndPage(pages);
// String content = stripper.getText(document);
// System.out.println(content);
stripper.writeText(document,writer);
writer.close();
fos.close();
document.close();
}
/**
* PDFprint the document at its actual size. This is the recommended way to print.
* 将pdf文件输出为xps格式文档
* XPS 是XML Paper Specification(XML文件规格书)的简称,是一种电子文件格式,它是微软公司开发的一种文档保存与查看的规范。
*/
private static void PDFprint(PDDocument document) throws IOException, PrinterException
{
PrinterJob job = PrinterJob.getPrinterJob();
job.setPageable(new PDFPageable(document));
job.print();
}
/**
* Prints using custom PrintRequestAttribute values.
* 带上要复制的页数,复制特定的页数
*/
private static void printWithAttributes(PDDocument document)
throws IOException, PrinterException
{
PrinterJob job = PrinterJob.getPrinterJob();
job.setPageable(new PDFPageable(document));
PrintRequestAttributeSet attr = new HashPrintRequestAttributeSet();
attr.add(new PageRanges(1, 1)); // pages 1 to 1
job.print(attr);
}
/**
* Prints with a print preview dialog.
* 跳出提示框,问要从哪里开始复制,复制到哪里,还有一些其他参数
*/
private static void printWithDialog(PDDocument document) throws IOException, PrinterException
{
PrinterJob job = PrinterJob.getPrinterJob();
job.setPageable(new PDFPageable(document));
if (job.printDialog())
{
job.print();
}
}
/**
* Prints with a print preview dialog and custom PrintRequestAttribute values.
跳出提示框,参数设置可选项比较多
*/
private static void printWithDialogAndAttributes(PDDocument document)
throws IOException, PrinterException
{
PrinterJob job = PrinterJob.getPrinterJob();
job.setPageable(new PDFPageable(document));
PrintRequestAttributeSet attr = new HashPrintRequestAttributeSet();
attr.add(new PageRanges(1, 1)); // pages 1 to 1
if (job.printDialog(attr))
{
job.print(attr);
}
}
/**
* Prints using a custom page size and custom margins.
* 设置纸张页面的大小
*/
private static void printWithPaper(PDDocument document)
throws IOException, PrinterException
{
PrinterJob job = PrinterJob.getPrinterJob();
job.setPageable(new PDFPageable(document));
// define custom paper
Paper paper = new Paper();
paper.setSize(306, 396); // 1/72 inch
paper.setImageableArea(0, 0, paper.getWidth(), paper.getHeight()); // no margins
// custom page format
PageFormat pageFormat = new PageFormat();
pageFormat.setPaper(paper);
// override the page format
Book book = new Book();
// append all pages
book.append(new PDFPrintable(document), pageFormat, document.getNumberOfPages());
job.setPageable(book);
job.print();
}
/**
* Add an image to an existing PDF document.
*将图片插入pdf文件并另存到另一个pdf文件中
* @param inputFile The input PDF to add the image to.
* @param imagePath The filename of the image to put in the PDF.
* @param outputFile The file to write to the pdf to.
*
*/
public static void createPDFFromImage( File inputFile, String imagePath, File outputFile )
throws IOException
{
try (PDDocument doc = PDDocument.load(inputFile))
{
//we will add the image to the first page.
PDPage page = doc.getPage(0);
// createFromFile is the easiest way with an image file
// if you already have the image in a BufferedImage,
// call LosslessFactory.createFromImage() instead
PDImageXObject pdImage = PDImageXObject.createFromFile(imagePath, doc);
try (PDPageContentStream contentStream = new PDPageContentStream(doc, page, AppendMode.APPEND, true, true))
{
// contentStream.drawImage(ximage, 20, 20 ) // better method inspired by http://stackoverflow.com/a/22318681/535646
// reduce this value if the image is too large
float scale = 1f;
contentStream.drawImage(pdImage, 20, 20, pdImage.getWidth() * scale, pdImage.getHeight() * scale);
}
doc.save(outputFile);
}
}
private static void test1(File pdfFile ,String imagePath,File pdfFile_out, File pdfFile_outDoc) {
PDDocument document = null;
try
{
// 方式一:
/**
InputStream input = null;
input = new FileInputStream( pdfFile );
//加载 pdf 文档
PDFParser parser = new PDFParser(new RandomAccessBuffer(input));
parser.parse();
document = parser.getPDDocument();
**/
// 方式二:(方便很多)
document=PDDocument.load(pdfFile);
// document.save("C:\\Users\\Administrator\\Desktop\\aa.pdf");//直接复制pdf
PrintToDoc(document, pdfFile_outDoc);
// PDFprint(document);
// printWithAttributes(document);
// printWithDialog(document);
// printWithDialogAndAttributes(document);
// printWithPaper(document);
// createPDFFromImage(pdfFile,imagePath,pdfFile_out);
}
catch(Exception e)
{
System.out.println(e);
}
}
}
各种文档就产生了: