【工具】PDF 元数据清洗工具说明

PDF 清除文档信息

使用说明

程序自动扫描当前路径下的 pdf 文档,判断文档信息是否需要清除

  • 清除前
    在这里插入图片描述

  • 清除后
    在这里插入图片描述

自定义配置

  • 新建 config.ini 文件
[global]
NanjingAuthor   = ' '
NanjingProducer = ' '
NanjingCreator  = ' '

查看日志

  • 日志路径:.\log
    在这里插入图片描述

源码

开源地址: https://github.com/ChenDudo/PDFCleaner

在这里插入图片描述

# !/usr/bin/env python3
# -*-coding:utf-8 -*-
# @Author  : Chen Do

import configparser
from  PyPDF2 import PdfFileReader
from  PyPDF2 import PdfWriter
import os
import time
import sys
import io
import datetime

global NanjingAuthor
global NanjingProducer
global NanjingCreator

NanjingAuthor   = "Firmware Development Group"
NanjingProducer = "MindMotion Nanjing Ecosystem"
NanjingCreator  = "Chen Do"

global handle_num
output_file = []
handle_num = 0


def create_detail_day():
    # day_time = datetime.datetime.now().strftime('day'+'%Y_%m_%d')
    # hour_time = datetime.datetime.now().strftime('time' + "%H_%M_%S")
    detail_time = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
    return detail_time

class Logger(object):
    def __init__(self, filename="Default.log", path=".\\"):
        # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
        self.terminal = sys.stdout
        self.log = open(os.path.join(path, filename), "a", encoding='utf8')

    def write(self, message):
        self.terminal.write(message)
        self.log.write(message)

    def flush(self):
        pass

def handle_doc(outputfilepath, filepath, filename):
    global handle_num
    global NanjingAuthor
    global NanjingProducer
    global NanjingCreator

    readfilepath = filepath + '\\' + filename
    outputfilepath = outputfilepath + '\\' + filename

    if not filename.endswith('.pdf'):
        print("[ERROR]\t\""+filename+'\" is not *.Pdf files!')
        return

    # reader & writer
    reader = PdfFileReader(readfilepath)
    writer = PdfWriter()

    # read meta
    meta = reader.metadata
    l_creation_data_raw = str(meta.creation_date_raw)
    l_modification_date_raw = str(meta.modification_date_raw)

    if (meta.producer == NanjingProducer) and (meta.author == NanjingAuthor) and (meta.creator == NanjingCreator):
        print("[WARN]\t\""+filename+'\" needn\'t to change!')
        return

    # Add all pages to the writer
    for page in reader.pages:
        writer.add_page(page)

    # Add the metadata
    writer.add_metadata(
        {
            "/Author": NanjingAuthor,
            "/Creator": NanjingCreator,
            "/Producer": NanjingProducer,
            "/CreationDate": l_creation_data_raw,
            "/ModDate": l_modification_date_raw
        }
    )

    # Save the new PDF to a file
    with open(outputfilepath, "wb") as f:
        writer.write(f)
        print('[INFO]\t'+'\"'+filename+"\" Done! "+'*'*36)

    # decoder handle number
    handle_num = handle_num + 1
    output_file.append(outputfilepath)


''' (config.ini) file module:
[global]
NanjingAuthor   = 'Firmware Development Group'
NanjingProducer = 'MindMotion Nanjing Ecosystem'
NanjingCreator  = 'Chen Do'
'''
def read_config_file(filepath):
    global NanjingAuthor
    global NanjingProducer
    global NanjingCreator
    conf_file = filepath + "\\config.ini"

    conf = configparser.ConfigParser()
    
    if os.path.exists(conf_file):
        conf.read(conf_file)
        if not "global" in conf:
            print("[WARN]\tNo global section in config file")
            pass
        else:
            if 'NanjingAuthor' in conf['global']:
                NanjingAuthor   = eval(conf.get("global", "NanjingAuthor"))
            if 'NanjingProducer' in conf["global"]:
                NanjingProducer = eval(conf.get("global", "NanjingProducer"))
            if 'NanjingCreator' in conf["global"]:
                NanjingCreator  = eval(conf.get("global", "NanjingCreator"))
    else:
        print("[ERROR]\tNo Config files, Use default config")
        # NanjingAuthor   = "Firmware Development Group"
        # NanjingProducer = "MindMotion Nanjing Ecosystem"
        # NanjingCreator  = "Chen Do"
    print("[info]\tNow Use following Config :")
    print("\t[Author]: " + NanjingAuthor)
    print("\t[Producer]: " + NanjingProducer)
    print("\t[Creator]: " + NanjingCreator)
  • 下载链接

PDF Cleaner

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值