对比txt文件路径和excel文件程序

最新推荐文章于 2024-09-15 22:12:08 发布
ETL.
最新推荐文章于 2024-09-15 22:12:08 发布
阅读量59
点赞数
文章标签： excel
本文链接：https://blog.csdn.net/qq_41821216/article/details/134185373
版权
这个程序是为了对比88和59上面img和doc的数量
需要提前通知**导出88上面'品牌'的img和doc文件路径以及导出mongo中该品牌的doc和img
import os
import pandas as pd

# 检查品牌和文件名之间有中缀文件夹的个数
def file_folder(file):
    j = 0
    for i in file.iloc[:,0]:
        countn = i.count('/')
        if countn > 5:
            j += 1
    return j

def check_consistency_imgdoc():
    data_txts = []
    result = []
    for brand in brands:
        for check in checks:
            x = y = z = s = 0
            result.append((' ', '', ' '))
            # result.append({f'品牌:{brand}', f'检查内容:{check}','数量'})
            for file_name in file_names:
                if brand in file_name and check in file_name and '~$' not in file_name:
                    file_path = path + '\\' + file_name
                    if '.xlsx' in file_name:
                        data_xlsx = pd.read_excel(file_path)   # 获取到的excel
                    else:
                        data_txts = pd.read_table(file_path,header=None)     #获取到的88文件

            data_xlsx:pd.DataFrame
            list_name = check + '_name'

            # 1-1: 获取59的文件名
            orig_59 = data_xlsx[list_name]
            # 1-2: 59文件名总数
            len_orig_59 = orig_59.shape[0]
            # 1-3: 59文件名去重
            name_59 = list(set(orig_59))
            # 1-4: 59文件名去重后个数
            len_name_59 = len(name_59)
            # 1-5: 59中已下载的文件名
            down_file_59 = data_xlsx[data_xlsx['is_download'] == 1][list_name]
            # 1-6: 59中已下载的文件去重
            down_file_59_reduce = list(set(down_file_59))
            # 1-7: 59中已下载文件去重个数
            len_down_file_59_reduce = len(down_file_59_reduce)
            # 1-8: 获取s_download为0的文件列表
            isnot_download = list(set(data_xlsx[data_xlsx['is_download'] == 0][list_name]))
            # 1-9: 获取is_download为0的数量
            isnot_download_number = data_xlsx[data_xlsx['is_download'] == 0].shape[0]
            # 1-10: 获取is_download为0文件去重之后的数量
            is_notdownload_number = len(isnot_download)
            # 1-11: 未下载文件百分比
            percent = str((lambda x: format(x, '.2%'))(is_notdownload_number / len_name_59))

            # 2-1: 获取88的文件名
            orig_88 = []
            for data in data_txts.iloc[:,0]:
                orig_88.append(data.split('\\')[-1])        # 看路径情况 选择是/还是\
            # 2-2: 88文件个数
            len_orig_88 = len(orig_88)
            # 2-3: 88文件名去重
            name_88 = list(set(orig_88))
            # 2-4: 88文件名去重后个数
            len_name_88 = len(name_88)
            # 2-5: 88文件的文件夹数
            file_folders = file_folder(data_txts)

            '''判断过程'''
            # 3-1: 判断59已下载的文件不在88上面的数量
            for name in down_file_59_reduce:
                if name not in name_88:
                    x += 1

            # 3-2: 判断88里是否有多余的59(全部)文件
            for name in name_88:
                if name not in name_59:
                    y += 1

            # 3-3: 59中未下载的文件在88上的数量
            for name in isnot_download:
                if name in name_88:
                    z += 1

            # 3-4: 不区分大小写:59上面显示已下载的文件 和 88文件路径对比结果
            upper_name_88 = [x.upper() for x in name_88]
            for i in down_file_59_reduce:
                if i.upper() in upper_name_88:
                    s += 1      # s 是转换大小写之后所有能找到的文件
            change_upper_solve_num = s - (len_down_file_59_reduce - x)

            '''结果显示:'''
            # 59部分
            result.append((f'{brand} - {check}', f'59-{check}总数', len_orig_59))
            result.append((f'{brand} - {check}', f'59-{check}去重数量', len_name_59))
            result.append((f'{brand} - {check}', f'59-未下载{check}数量(is_download)', isnot_download_number))
            result.append((f'{brand} - {check}', f'59-未下载{check}去重数量(is_download)', is_notdownload_number))
            result.append((f'{brand} - {check}', f'59-未下载{check}百分比', percent))

            # 88部分
            result.append((f'{brand} - {check}', f'88-{check}总数', len_orig_88))
            result.append((f'{brand} - {check}', f'88-{check}去重数量(不必要)', len_name_88))
            result.append((f'{brand} - {check}', f'88-{check}的文件夹数(需处理)', file_folders))

            '''判断结果'''
            if x:
                result.append((f'{brand} - {check}', f'59中已下载的{check}不在88上的数量', x))
            else:
                result.append((f'{brand} - {check}', f'59已下载{check}全部存在', ''))

            if y:
                result.append((f'{brand} - {check}', f'88中无效{check}数量(修改大小写之后数目能减少)', y ))
            else:
                result.append((f'{brand} - {check}', f'88中没有无效{check}', ''))

            if z:
                result.append((f'{brand} - {check}', f'59中未下载{check}在88上的数量', z))
            else:
                result.append((f'{brand} - {check}', f'59中未下载{check}也不在88上', ''))

            if change_upper_solve_num:
                result.append((f'{brand} - {check}', f'59中已下载{check}转换大小写就能在88里找到的总数', s))
                result.append((f'{brand} - {check}', f'转换大小写之后就能找到的文件数(新增)', change_upper_solve_num))
                result.append((f'{brand} - {check}', f'59中已下载的文件转换大小写还没找到的文件名去重', x - change_upper_solve_num))
                result.append((f'{brand} - {check}', f'59中实际缺少的文件去重(已下载未上传+未下载未上传)', x - change_upper_solve_num + z))
            else:
                result.append((f'{brand} - {check}', f'88{check}区分大小写也不管用', ''))


        result_end = pd.DataFrame(result,columns=['品牌-分类','检查内容','数量'])
        result_end1 = result_end[['品牌-分类','数量','检查内容']][1:]
        print(result_end1)

brands = ['INFINEON']
checks = ['doc']
path = r"D:\FileRecv\新建文件夹"    # 文件夹路径
file_names =  os.listdir(path)
check_consistency_imgdoc()