快学scala下载[pdf] pdf转doc java提取pdf（forty day）

最新推荐文章于 2023-02-26 18:54:39 发布

高辉

最新推荐文章于 2023-02-26 18:54:39 发布

阅读量617

点赞数

分类专栏：向大数据进军~每天记 Java基础文章标签： pdf转doc 快学scala下载 java提取pdf

本文链接：https://blog.csdn.net/zjx103rlf/article/details/89853509

版权

向大数据进军~每天记同时被 2 个专栏收录

58 篇文章 0 订阅

订阅专栏

Java基础

21 篇文章 1 订阅

订阅专栏

《快学scala》

链接：https://pan.baidu.com/s/1T12_C-cVwfwwJQtbb6Cvdg
提取码：teaj

【注：pdf只有文字的话下面可以使用，《快学scala》转不了，可能是识别成图片了？反正我没成功，有大神会的话可以评论教一下小弟】

1、java提取pdf输出

import com.itextpdf.text.pdf.PdfDocument;
import org.pdfbox.pdfparser.PDFParser;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.util.PDFTextStripper;

import java.io.FileWriter;
import java.io.*;

public class PdfReader {
    public static String getTextFromPDF(String pdfFilePath)
    {
        String result = null;
        FileInputStream is = null;
        PDDocument document = null;
        try {
            is = new FileInputStream(pdfFilePath);
            PDFParser parser = new PDFParser(is);
            parser.parse();
            document = parser.getPDDocument();
            PDFTextStripper stripper = new PDFTextStripper();
            result = stripper.getText(document);
        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } finally {
            if (is != null) {
                try {
                    is.close();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
            if (document != null) {
                try {
                    document.close();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
        }
        return result;
    }
    public  static void main(String[] args)
    {
        String str=PdfReader.getTextFromPDF("F:\\51.pdf");
        System.out.println(str);

    }

}

2、java pdf转doc

import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.util.PDFTextStripper;

import java.io.*;

public class PdfToDoc {
    public PdfToDoc() {
    }

    public static void pdfToDoc(String name1) throws IOException {
        PDDocument doc = PDDocument.load(name1);
        int pagenumber = doc.getNumberOfPages();
        name1 = name1.substring(0, name1.lastIndexOf("."));
        String fileName = name1 + ".doc";
        createFile(fileName);
        FileOutputStream fos = new FileOutputStream(fileName);
        Writer writer = new OutputStreamWriter(fos, "UTF-8");
        PDFTextStripper stripper = new PDFTextStripper();
        stripper.setSortByPosition(true);
        stripper.setStartPage(1);
        stripper.setEndPage(pagenumber);
        stripper.writeText(doc, writer);
        writer.close();
        doc.close();
        System.out.println("pdf转换word成功！");
    }

    private static void createDir(String destDirName) {
        File dir = new File(destDirName);
        if (dir.exists()) {
            System.out.println("创建目录失败，目标目录已存在！");
        }

        if (!destDirName.endsWith(File.separator)) {
            destDirName = destDirName + File.separator;
        }

        if (dir.mkdirs()) {
            System.out.println("创建目录成功！" + destDirName);
        } else {
            System.out.println("创建目录失败！");
        }

    }

    public static void createFile(String filePath) {
        File file = new File(filePath);
        if (file.exists()) {
            System.out.println("目标文件已存在" + filePath);
        }

        if (filePath.endsWith(File.separator)) {
            System.out.println("目标文件不能为目录！");
        }

        if (!file.getParentFile().exists()) {
            System.out.println("目标文件所在目录不存在，准备创建它！");
            if (!file.getParentFile().mkdirs()) {
                System.out.println("创建目标文件所在的目录失败！");
            }
        }

        try {
            if (file.createNewFile()) {
                System.out.println("创建文件成功:" + filePath);
            } else {
                System.out.println("创建文件失败！");
            }
        } catch (IOException var3) {
            var3.printStackTrace();
            System.out.println("创建文件失败！" + var3.getMessage());
        }

    }

    public static void main(String[] args) throws Exception {
        String a = "F:/51.pdf";
        pdfToDoc(a);
    }