如何使用JAVA代码将WORD转成PDF.

如何使用JAVA代码将WORD转成PDF.

  1. 引入jar包

<!-- 转换doc为pdf的组件 -->
        <dependency>
            <groupId>e-iceblue</groupId>
            <artifactId>spire.doc</artifactId>
            <version>4.11.3</version>
        </dependency>

        <!-- 消除水印的组件 -->
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox</artifactId>
            <version>2.0.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>fontbox</artifactId>
            <version>2.0.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>jempbox</artifactId>
            <version>1.8.11</version>
        </dependency>
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>xmpbox</artifactId>
            <version>2.0.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>preflight</artifactId>
            <version>2.0.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox-tools</artifactId>
            <version>2.0.0</version>
        </dependency>
  1. 代码转换帮助类

package com.tdsms.web.controller.tool;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;

import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.util.Matrix;
import org.apache.pdfbox.util.Vector;

import com.spire.doc.Document;
import com.spire.doc.FileFormat;

import lombok.extern.slf4j.Slf4j;

/**  
 * 将doc文件转换为pdf文件
 * */
@Slf4j
public class PdfBoxConverter {
    /** spire组件添加的无用的文字信息 */
    private static final String spire_watermark_text = "Evaluation Warning: The document was created with Spire.Doc for JAVA.";
    /** pdf的文本块标记 */
    private static final List<String> TEXT_SHOWING_OPERATORS = Arrays.asList("Tj", "'", "\"", "TJ");
    
    public static void main(String[] args) throws IOException {
        File tmpFile = File.createTempFile("1231", ".pdf");
        File srcFile = new File("C:\\Users\\Rzxuser\\Downloads\\文档安全.docx");
        File dstFile = new File("D:\\安全文档.pdf");
        convert(srcFile, dstFile);
    }
    
    /** 将doc文件转换为pdf文件 */
    public static final void convert(File docFile, File dstFile) {
        Document doc = null;
        PDDocument document = null;
        ByteArrayOutputStream baos = null;
        try {
            log.info("开始加载doc!");
            doc = new Document();
            doc.loadFromFile(docFile.getCanonicalPath());
            log.info("加载doc完成!");

            //保存为PDF格式的文件
            log.info("开始转换doc至pdf!");
            baos = new ByteArrayOutputStream();
            doc.saveToFile(baos, FileFormat.PDF);
            log.info("转换doc至pdf完成!");

            log.info("开始消除pdf无用文本信息,生成新的pdf!");
            document = PDDocument.load(baos.toByteArray());
            // 去掉第三方组件添加的无用信息
            removeText(document, spire_watermark_text);

            document.save(dstFile.getCanonicalPath());
            log.info("消除pdf无用文本信息,生成新的pdf完成!");
        } catch (Exception e) {
            log.error("转换doc至pdf出错!", e);
        } finally {
            if (doc != null)
                doc.close();
            if (document != null)
                try {
                    document.close();
                } catch (IOException e) {
                    log.error("转换doc至pdf出错!", e);
                }

            if (baos != null) {
                try {
                    baos.flush();
                    baos.close();
                } catch (IOException e) {
                    log.error("转换doc至pdf出错!", e);
                }
            }
        }
    }
    
    /** 去掉第三方组件添加的无用信息 */
    public static final void removeText(PDDocument documentoPDF, String searchString) throws IOException {
        // 计量第几行的辅助变量,为了去掉第一页的页头的无用text信息
        int count = 0;
        final char[] charArray = searchString.toCharArray();
        
        for (PDPage page : documentoPDF.getPages()) {
            count++;
            PdfContentStreamEditor editor = new PdfContentStreamEditor(documentoPDF, page, count) {
                final StringBuilder recentChars = new StringBuilder();
                
                @Override
                protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, String unicode, Vector displacement) throws IOException {
                    String str = font.toUnicode(code);
                    if (str != null) {
                        recentChars.append(str);
                    }
                    
                    super.showGlyph(textRenderingMatrix, font, code, unicode, displacement);
                }
                
                @Override
                protected void write(ContentStreamWriter contentStreamWriter, Operator operator, List<COSBase> operands) throws IOException {
                    String recentText = recentChars.toString();
                    if (this.pageIndex == 1) {
                        // 第一行的特殊逻辑处理,只是处理searchString中出现的文本的信息
                        if (recentText.length() == 0 || !inCharArray(recentText.charAt(recentText.length() - 1), charArray)) {
                            super.write(contentStreamWriter, operator, operands);
                            return;
                        } else {
                            if (recentText.indexOf(searchString) != -1) {
                                // 这里设为2,是为了直接不处理,也就是不走if (this.pageIndex == 1) 这个判断
                                pageIndex = 2;
                            }
                            return;
                        }
                    } else {
                        recentChars.setLength(0);
                    }
                    String oprStr = operator.getName();
                    
                    if (TEXT_SHOWING_OPERATORS.contains(oprStr) && searchString.equals(recentText)) {
                        return;
                    }
                    
                    super.write(contentStreamWriter, operator, operands);
                }
            };
            editor.processPage(page);
        }
    }
    
    /** 辅助判断方法,判断c是否在charArray中 */
    public static final boolean inCharArray(char c, char[] charArray) {
        for (char ca : charArray) {
            if (c == ca) {
                return true;
            }
        }
        return false;
    }
}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值