Java实现读取PDF

package com.lss.common.pdf;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.List;

import org.apache.log4j.Logger;
import org.apache.pdfbox.TextToPDF;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.interactive.form.PDTextbox;
import org.apache.pdfbox.util.PDFTextStripper;

public class PDFUtils {
 private final static Logger logger = Logger.getLogger(PDFUtils.class);

 public static void main(String[] args) throws IOException {

  TextToPDF pdfCreator = new TextToPDF();
  StringReader reader = new StringReader("1");
  PDDocument pdfDoc = pdfCreator.createPDFFromText(reader);
  reader.close();
  pdfDoc.close();
  PDPage page=new PDPage();
  pdfDoc.addPage(page);

  // In order for the PDF document to be openable by Adobe Reader, it
  // needs
  // to have some pages in it. So we'll check that.
  PDDocumentCatalog docCatalog = pdfDoc.getDocumentCatalog();
  List allPages = docCatalog.getAllPages();
  System.out.println(allPages.size());
 }

 /**
  * 读取PDF文件
  *
  * @param path
  * @return
  * @throws Exception
  */
 public String readPDFFile(String path) {
  try {
   StringBuffer content = new StringBuffer();
   FileInputStream fis = new FileInputStream(path);
   PDFParser p = new PDFParser(fis);
   p.parse();
   PDFTextStripper ts = new PDFTextStripper();
   content.append(ts.getText(p.getPDDocument()));
   fis.close();
   return content.toString();
  } catch (Exception ex) {
   logger.error(ex);
  }
  return null;
 }

 public String readPDFFile(File file) throws Exception {
  StringBuffer content = new StringBuffer();
  FileInputStream fis = null;
  PDDocument pdfDocument = null;
  StringWriter writer = new StringWriter();
  PDFTextStripper stripper = new PDFTextStripper();
  try {
   fis = new FileInputStream(file);
   pdfDocument = PDDocument.load(fis);
   stripper.writeText(pdfDocument, writer);
   content.append(writer.getBuffer().toString());
  } catch (IOException e) {
   logger.error(e);
  } finally {
   if (writer != null) {
    writer.close();
    writer = null;
   }
   if (fis != null) {
    fis.close();
    file = null;
   }
   if (pdfDocument != null) {
    COSDocument cos = pdfDocument.getDocument();
    cos.close();
    pdfDocument.close();
   }
  }
  return content.toString();
 }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值