06.批量修改文件夹下的所有pdf文件名

批量修改文件夹下的所有PDF文件名

按照关键字修改pdf文件名为文章内的标题,该程序实现的是修改基金官网披露的定报文件pdf的文件名,因为每一个文件的标题都带有报告两个字,所以可以使用正则表达式进行匹配与修改。下面提供一个简单版与完整版。完整版只适用于基金官网披露的定报pdf,考虑的情况比较多,情况比较完善。简单版修改关键字适用于大多情况,不过会有一些文件修改不正确,属正常情况,特殊情况需要特殊分析,只是提供一种修改思想。

  1. 简单版
import pdfplumber
import re
import os

Project = 'D:\证监会基金\\'		#需要修改的文件夹
filename = []
paths = []
for filename in os.walk(Project):
    try:
        filename = filename[2:]
        for paths in filename:
            paths = paths
            for path in paths:
                path = Project + path
                print(path)
                with pdfplumber.open(path) as pdf:
                    page = pdf.pages[0]   # 第一页的信息
                    text = page.extract_text()
                    mm_0 = re.sub(r'\n', '', text)
                    print(mm_0)
                    mm_1 = re.sub(r'报告.*', '报告', mm_0)
                    begin = path
                    end = mm_1 + '.pdf'
                    print(end)
                    print('*'*40)
                    pdf.close()
                    try: 
                        os.rename(begin, end)
                    except:
                        pass
    except:
        pass
  1. 完整版
"""
    时间:2020-04-10
    功能:批量修改基金定报pfd文件标题
"""
import pdfplumber
from itertools import groupby
import re
import os
from colorama import Fore, Back, Style, init


def routine(str1):
    str2 = re.sub(r'报告.*', '报告', str1)
    str2 = re.sub(r'\s', '', str2)
    return str2


def changlist(list1, str1):
    list1[0], list1[1] = list1[1], list1[0]
    str1 = str1 + ''.join(list1)
    return str1


def have_forlord(list1, str1, str2):
    str3 = re.sub(r'(.*', str1, str2)
    i = list1[-1]   # 年年度报告2019
    list2 = [''.join(list(g)) for k, g in groupby(i, key=lambda x: x.isdigit())]
    if len(list2) == 1:
        str3 = routine(str2)
    else:
        str3 = changlist(list2, str3)
    list3 = str3.split()
    if len(list3) > 1:
        i = list3[-2]
        list4 = [''.join(list(g)) for k, g in groupby(i, key=lambda x: x.isdigit())]
        if len(list4) > 1:
            str3 = changlist(list4, list3[0])
            str3 = str3 + list3[-1]
        else:
            str3 = ''.join(list3)
    return str3


def main():
    init(autoreset=True)
    project = 'D:\证监会基金\\'
    filename = []
    paths = []
    num = 0
    for filename in os.walk(project):
        filename = filename[2:]
    for paths in filename:
        paths.sort()
    for path in paths:
        path = project + path
        print(path)
        # date = re.sub(r'\s.*', '', path)
        try:
            with pdfplumber.open(path) as pdf:
                page = pdf.pages[0]  # 第一页的信息
                mm_0 = re.sub(r'\n', '', page.extract_text())
                # print(mm_0)
                mm_1 = re.sub(r'基金管理人.*', '', mm_0)
                mm_2 = mm_1.strip()  # 去除首尾空格
                year = re.findall('\d{4}', mm_2)
                istop = re.findall('度报告', mm_2)
                if len(istop) > 1:
                    mm_3 = routine(mm_2)
                else:
                    if ' ' in mm_2:
                        mm_21 = re.sub(r'' + year[1] + ".*"'', year[1], mm_2)
                        list1 = mm_21.split()  # 以空格拆分
                        if (('Q' in mm_21) & ('L' in mm_21)) | (('Q' in mm_21) & ('l' in mm_21)):
                            mm_3 = have_forlord(list1, '(QDII-LOF)', mm_2)
                        elif ('L' in mm_21) | ('l' in mm_21):
                            mm_3 = have_forlord(list1, '(LOF)', mm_2)
                        elif ('F' in mm_21) | ('f' in mm_21):
                            mm_3 = have_forlord(list1, '(FOF)', mm_2)
                        elif ('Q' in mm_21) | ('q' in mm_21):
                            mm_3 = have_forlord(list1, '(QDII)', mm_2)
                        elif len(list1) >= 3:
                            i = list1[1]
                            j = list1[-1]
                            list2 = [''.join(list(g)) for k, g in groupby(i, key=lambda x: x.isdigit())]
                            list3 = [''.join(list(g)) for k, g in groupby(j, key=lambda x: x.isdigit())]
                            if (len(list2) == 1) & (len(list3) == 1):
                                mm_3 = routine(mm_2)
                            else:
                                if len(list2) > 1:
                                    for k in range(len(list2) - 1):
                                        list2[k], list2[k + 1] = list2[k + 1], list2[k]
                                list1[1] = ''.join(list2)
                                if len(list3) > 1:
                                    for k in range(len(list3) - 1):
                                        list3[k], list3[k + 1] = list3[k + 1], list3[k]
                                list1[-1] = ''.join(list3)
                                mm_3 = ''.join(list1)

                        elif len(list1) == 2:
                            i = list1[1]
                            list2 = [''.join(list(g)) for k, g in groupby(i, key=lambda x: x.isdigit())]
                            if len(list2) > 1:
                                mm_3 = changlist(list2, list1[0])
                            else:
                                mm_3 = routine(mm_2)
                        else:
                            mm_3 = routine(mm_2)
                    else:
                        mm_3 = routine(mm_2)

                # end = date + ' ' + mm_1 + '.pdf'
                end = project + mm_3 + '.pdf'
                num = num + 1
                print(end)
                print(Fore.RED + Style.BRIGHT + '成功修改第{}个文件'.format(num))  # 高亮红色
                print(Fore.CYAN + Style.BRIGHT + '*' * 79)    # 高亮青色
                pdf.close()
                os.rename(path, end)
        except TypeError:
            print(Fore.BLACK + Back.RED + "修改失败,该文件为图片格式,请手动修改,按任意键继续执行。")
            print(Fore.CYAN + Style.BRIGHT + '*' * 79)  # 高亮青色
            input()
            continue
        except:
            print(Fore.BLACK + Back.RED + "该文件改名出现错误,请将该文件发送给李渊,以供优化程序,按任意键继续执行。")
            print(Fore.CYAN + Style.BRIGHT + '*' * 79)  # 高亮青色
            input()
            continue
    print(Back.CYAN + Fore.BLACK + Style.BRIGHT+"程序执行结束,按任意键退出。")
    input()


if __name__ == '__main__':
    main()

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值