pdf，word文档互转

最新推荐文章于 2024-06-16 17:09:45 发布

qxy_1218

最新推荐文章于 2024-06-16 17:09:45 发布

阅读量116

点赞数

文章标签： pdf word java

本文链接：https://blog.csdn.net/qxy_1218/article/details/132670561

版权

引入依赖

<!--   文档转换pdf转word     -->
<dependency>
    <groupId>com.github.lafa.pdfbox</groupId>
    <artifactId>pdfbox</artifactId>
    <version>1.0.1</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>4.1.0</version>
</dependency>
<!--处理word文档需要的额外的jar包-->
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>4.1.0</version>
</dependency>
<!--处理word文档需要的额外的jar包-->
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml-schemas</artifactId>
    <version>4.1.0</version>
</dependency>


<!--word转pdf-->
<dependency>
    <groupId>com.documents4j</groupId>
    <artifactId>documents4j-local</artifactId>
    <version>1.0.3</version>
</dependency>
<dependency>
    <groupId>com.documents4j</groupId>
    <artifactId>documents4j-transformer-msoffice-word</artifactId>
    <version>1.0.3</version>
</dependency>

具体代码：

package com.example.wpsutil;

import com.documents4j.api.DocumentType;
import com.documents4j.api.IConverter;
import com.documents4j.job.LocalConverter;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;

import java.io.*;

public class WpsUtil {
    public static void main(String[] args) {

        try {
            pdfword2();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }


    //pdf转word
    public static void pdfword(){
        try {
            // 读取PDF文件
            PDDocument document = PDDocument.load(new FileInputStream("D:\\1.pdf"));

            // 创建Word文档对象
            XWPFDocument doc = new XWPFDocument();

            // 获取PDF文本内容
            PDFTextStripper stripper = new PDFTextStripper();
            String text = stripper.getText(document);

            // 将PDF文本内容写入Word文档
            XWPFParagraph paragraph = doc.createParagraph();
            XWPFRun run = paragraph.createRun();
            run.setText(text);

            // 保存Word文档
            FileOutputStream out = new FileOutputStream("D:\\sql1.docx");
            doc.write(out);

            // 关闭文档对象
            doc.close();
            document.close();

            System.out.println("PDF转Word成功！");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }


    //word转pdf
    public static void word2pdf() throws IOException {
        // 参考：https:blog.csdn.net/ka3p06/article/details/125476270 通过documents4j实现
        InputStream docxInputStream = null;
        OutputStream outputStream = null;
        try {
            // 原word地址
            docxInputStream = new FileInputStream("D:\\sql1.docx");
//            URL url = new URL("d:\\sql1.docx");
//            docxInputStream = url.openStream();

            // 转换后pdf生成地址
            outputStream = new FileOutputStream("d:\\2.pdf");
            IConverter converter = LocalConverter.builder().build();
            converter.convert(docxInputStream)
                    .as(DocumentType.DOCX)
                    .to(outputStream)
                    .as(DocumentType.PDF).execute();
            // 关闭
            converter.shutDown();
            // 关闭
            outputStream.close();
            // 关闭
            docxInputStream.close();
        } catch (Exception e) {
            System.out.println("[documents4J] word转pdf失败:" + e.toString());
        } finally {
            if (outputStream != null) {
                outputStream.close();
            }
            if (docxInputStream != null) {
                docxInputStream.close();
            }
        }
    }

    //pdf转word（无损，只能转4页）
    public static void pdfword2() throws IOException {
        // 参考：https:blog.csdn.net/ka3p06/article/details/125476270 通过documents4j实现
        InputStream docxInputStream = null;
        OutputStream outputStream = null;
        try {
            // 原word地址
            docxInputStream = new FileInputStream("D:\\1.pdf");
//            URL url = new URL("d:\\sql1.docx");
//            docxInputStream = url.openStream();

            // 转换后pdf生成地址
            outputStream = new FileOutputStream("d:\\2.docx");
            IConverter converter = LocalConverter.builder().build();
            converter.convert(docxInputStream)
                    .as(DocumentType.PDF)
                    .to(outputStream)
                    .as(DocumentType.DOCX).execute();
            // 关闭
            converter.shutDown();
            // 关闭
            outputStream.close();
            // 关闭
            docxInputStream.close();
        } catch (Exception e) {
            System.out.println("[documents4J] word转pdf失败:" + e.toString());
        } finally {
            if (outputStream != null) {
                outputStream.close();
            }
            if (docxInputStream != null) {
                docxInputStream.close();
            }
        }
    }


}