Python处理Excel（4）：正则查找Excel复制行

最新推荐文章于 2023-12-29 09:37:37 发布
weixin_46655338
最新推荐文章于 2023-12-29 09:37:37 发布
阅读量369
点赞数
分类专栏： Python Excel 文章标签： python 开发语言
本文链接：https://blog.csdn.net/weixin_46655338/article/details/124808904
版权
Python Excel 专栏收录该内容
4 篇文章 0 订阅
订阅专栏
Python处理Excel（4）：正则查找Excel复制行
#coding=utf-8
#正则查找Excel复制行
import xlrd, xlwt, xlwings, pyautogui, time, pyperclip, easygui, os, tkinter, io,pandas,shutil,re
from tkinter import filedialog,ttk
from openpyxl import load_workbook    #pip install openpyxl
#import autopy as at


#file= tkinter.filedialog.askopenfile()#选择打开什么文件，返回IO流对象
#file= tkinter.filedialog.askopenfilename()#选择打开什么文件，返回文件名

#print(file)





from io import StringIO
import sys
from contextlib import contextmanager


import sys





def get_file_list(raw_folder_path):
    # 打开文件
    dirs = os.listdir(raw_folder_path)
    raw_file_list=[]
    raw_file_path_list=[]
    for home, dirs, files in os.walk(raw_folder_path):
        for file_name in files:

            raw_file_list.append((file_name))
            raw_file_path_list.append(os.path.join(home, file_name))
    print('raw_file_list')
    print(raw_file_list)
    return raw_file_list,raw_file_path_list
    # print('对列表进行排序，查找时可能减少遍历次数。也便于根据账套号顺序查找')
    # #file_accnt_num_list2=file_accnt_num_list.sort()
    # print('sort影响列表本身，sorted不影响列表本身，用sort排序')
    # file_exist_list = sorted(file_accnt_num_list)
    # print(file_exist_list)
    # return file_exist_list








def gath_data(raw_file_list,raw_file_path_list):
    print('gather data to excel')
    list1=[]
    #list1=[['村委会', '年份', '月份', '科目代码', '科目名称', '余额方向', '期初余额', '本月借方', '本月贷方', '期末余额', '本年借方', '本年贷方']]

    print("file name")
    # # print('查找办公经费结尾')
    # # find_data = re.compile('.*?办公经费')  # 定义正则表达式，即任何含  的字段
    # # print('查找办公经费结尾2')
    # # find_data = re.compile('.*办公经费')  # 定义正则表达式，即任何含  的字段
    #
    # # print('查找办公经费结尾3')
    # # find_data = re.compile('.*（社区）办公经费')  # 定义正则表达式，即任何含  的字段
    # print('专项应付款-村（社区）办公经费')
    # find_data1 = re.compile('专项.*（社区）办公经费')
    # # find_data='专项应付款-村（社区）办公经费'
    # print('专项应付款-党组织服务群众经费')
    # find_data2 = re.compile('专项.*群众经费')
    # input=input("查找办公经费请输入1，查找服务群众经费请输入2")

    print('管理费用-其他费用-办公经费')
    find_data1 = re.compile('管理.*办公经费')
    # find_data='专项应付款-村（社区）办公经费'
    print('管理费用-其他费用-党群服务群众经费')
    find_data2 = re.compile('管理.*群众.*经费')
    print('竹洛科目名称是管理费用--党组织群众服务经费，其他名称都是：管理费用-党组织服务群众经费')
    find_data3 = re.compile('.*党员活动经费')
    #发包及上交收入
    find_data4 = re.compile('.*基本农田保护.*')
    print('专项应付款-基本农田保护补贴资金')
    #专项应付款-党员活动经费

    find_data5 = re.compile('发包及上交收入.*')
    #三清三拆三整治
    find_data6 = re.compile('.*三清三拆.*')

    print('input number')
    input_N=input("1办公经费请，2服务群众经费，3党员活动经费，4基本农田保护补贴，5发包及上交收入，6三清三拆，请输入:")


    if input_N=='1':
        find_data=find_data1
        sheet_name = "办公经费数据源"
    elif input_N=='2':
        find_data=find_data2
        sheet_name = "服务群众经费数据源"
    elif input_N=='3':
        find_data=find_data3
        sheet_name = "党员活动经费数据源"
    elif input_N == '4':
        find_data = find_data4
        sheet_name = "基本农田数据源"
    elif input_N == '5':
        find_data = find_data5
        sheet_name = "基本农田数据源"
    elif input_N == '6':
        find_data = find_data6
    sheet_name = "三清三拆数据源"
    for i in range(0,len(raw_file_path_list)):
        fil_nam = raw_file_list[i]
        raw_file_path=raw_file_path_list[i]
        #print(fil_nam)
        # get cared macro info from testplan and save as 'MacroInfo.xlsx'

        getInfo = pandas.read_excel(raw_file_path, sheet_name="Sheet1", dtype=str, keep_default_na=False)
        for i in getInfo.index:
            #for j in range(len(getInfo.loc[i].values)):
            result1 = re.match(find_data, getInfo.loc[i].values[3])
            print('查找当年第几月份的两项经费')
            if  result1 and  getInfo.loc[i].values[1]=="8":

                row_data=getInfo.loc[i].values
                #print(row_data)
                row_data_list=list(row_data)
                print("administrative village行政村")
                print('依次删除文件名的前9个字符，删除当年科目余额表、经联社、会，得到行政村名')
                admin_vill=fil_nam[9:].strip("当年科目余额表.xls").strip("经联社").strip("经济联合社").replace("居委会","居委").replace("冲蒌镇","居委")
                admin_vill = fil_nam[9:].strip("当年科目余额表.xls").replace("冲蒌圩", "居委").replace("12月", "").replace("经济联合社", "")
                admin_vill=admin_vill[3:]
                row_data_list.insert(0,admin_vill)
                print(row_data_list)
                list1.append(row_data_list)
            # if (getInfo.loc[i].values[j] == find_data):

            #     row_data=getInfo.loc[i].values
            #     #print(row_data)
            #     row_data_list=list(row_data)
            #     row_data_list.insert(0,fil_nam)
            #     print(row_data_list)
            #     list1.append(row_data_list)

    print(list1)
    print('column list')
    colu_list = ['村委会', '年份', '月份', '科目代码', '科目名称', '余额方向', '期初余额', '本月借方', '本月贷方', '期末余额', '本年借方', '本年贷方']
    admin_vill_order = ['前锋', '西海', '八家', '冲洋', '新围', '竹洛', '竹湖', '新屋', '达材', '朝中', '官窦', '伞塘', '白岗', '稔坪', '西坑',
                        '三和', '居委']
    print('target data目标数据')
    targ_data=pandas.DataFrame(list1,columns=colu_list)
    print(targ_data)
    # 将列索引为村委会的列变成data的行索引
    targ_data=targ_data.set_index("村委会")
    print(targ_data)

    print('按村委会顺序排列')
    targ_data=targ_data.reindex(admin_vill_order)
    #targ_data.sort_values(by=admin_vill_order)
    print(targ_data)



    # 判断是否存在output文件夹
    OutputPath=r"E:\MyProjects\PycharmProjects\python01\Python&Excel"
    # if (os.path.exists(OutputPath)):
    #     shutil.rmtree(OutputPath)
    #     print('output dir has been rm -rf and new makedirs')
    # os.makedirs(OutputPath)
    if not os.path.exists(OutputPath):
        os.mkdir(OutputPath)

    #print('覆盖Excel原来的数据写入Excel')
    # 写入文件保存在output 文件夹下
    #filepath = os.path.join(OutputPath, '村（社区）办公经费.xls')
    #targ_data.to_excel(filepath, header=3, index=False, encoding='utf-8', sheet_name='村（社区）办公经费')

    print('不覆盖Excel原来的数据写入Excel')
    book = load_workbook(r'202108冲蒌镇两项经费支出情况统计表.xlsx')
    writer = pandas.ExcelWriter('202108冲蒌镇两项经费支出情况统计表.xlsx', engine='openpyxl')
    writer.book = book
    writer.sheets = dict((ws.title, ws) for ws in book.worksheets)#获取文件中已存在的表名，这行直接用，不用修改

    targ_data.to_excel(writer,header=3, sheet_name=sheet_name)
    writer.save()

    # with pandas.ExcelWriter(r'202108冲蒌镇两项经费支出情况统计表.xlsx') as E:
    #     E.book = book
    #     E.sheets = dict((ws.title, ws) for ws in book.worksheets) #获取文件中已存在的表名，这行直接用，不用修改
    #     targ_data.to_excel(E, header=3,sheet_name='办公经费数据源',index=False)# 以下为无关内容，可将对应的数据写入对应的表<br>　　for sheetname in sheetnames:#遍历表名
    #
    #


def func():
    year = time.strftime("%Y")
    print('选择目录，返回目录名')
    raw_folder_path=tkinter.filedialog.askdirectory()


    #raw_folder_path = r'd:\贝佳会计系统导出数据\两项经费统计\2020序时簿'
    raw_file_list,raw_file_path_list=get_file_list(raw_folder_path)

    #input_file_path_plan = r"E:\贝佳会计系统导出数据\两项经费统计\2020年序时簿\2020 045冲洋经联社序时簿.xls"
    gath_data(raw_file_list,raw_file_path_list)

if __name__ == "__main__":
    func()






#
#
#
#
#
#
#
#
#
#
# def saveAsNewExcelFile(raw_file_path_list):
#     list1=[]
#     print("file name")
#     fil_nam='村委会1'
#
#     for raw_file_path in raw_file_path_list:
#
#         for i in getInfo.index:
#             for j in range(len(getInfo.loc[i].values)):
#                 if (getInfo.loc[i].values[j] == '专项应付款-村（社区）办公经费'):
#                     start_col = i + 1
#                     row_data = getInfo.loc[i].values
#                     # print(row_data)
#                     row_data = list(row_data)
#                     row_data = list[fil_nam] + row_data
#                     list1.append(row_data)
#         print(list1)
#         print('target data目标数据')
#         targ_data = pandas.DataFrame(list1)
#
#
#
#
#     # get cared macro info from testplan and save as 'MacroInfo.xlsx'
#     getInfo = pandas.read_excel(input_file_path_plan, sheet_name="Sheet1", dtype=str, keep_default_na=False)
#     for i in getInfo.index:
#         for j in range(len(getInfo.loc[i].values)):
#             if (getInfo.loc[i].values[j] == '专项应付款-村（社区）办公经费'):
#                 start_col = i + 1
#                 row_data=getInfo.loc[i].values
#                 #print(row_data)
#                 row_data=list(row_data)
#                 row_data=list[fil_nam]+row_data
#                 list1.append(row_data)
#     print(list1)
#     print('target data目标数据')
#     targ_data=pandas.DataFrame(list1)
#                 #break #add 2.3
#                 # print(getInfo.loc[i].values[j])
#     #x = pandas.DataFrame(getInfo.iloc[start_col:, ])
#     #print(x)
#     # 判断是否存在output文件夹
#     OutputPath=r"e:\贝佳会计系统导出数据\两项经费统计"
#     if (os.path.exists(OutputPath)):
#         shutil.rmtree(OutputPath)
#         print('output dir has been rm -rf and new makedirs')
#     os.makedirs(OutputPath)
#
#     # 写入文件保存在output 文件夹下
#     filepath = os.path.join(OutputPath, '村（社区）办公经费.xls')
#     targ_data.to_excel(filepath, header=2, index=False, encoding='utf-8', sheet_name='村（社区）办公经费')
weixin_46655338
关注
0
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
Python处理Excel（4）：正则查找Excel复制行

Python处理Excel（4）：正则查找Excel复制行#coding=utf-8#正则查找Excel复制行import xlrd, xlwt, xlwings, pyautogui, time, pyperclip, easygui, os, tkinter, io,pandas,shutil,refrom tkinter import filedialog,ttkfrom openpyxl import load_workbook #pip install openpyxl#impo
复制链接

扫一扫