这个程序是为了对比88和59上面img和doc的数量
需要提前通知**导出88上面'品牌'的img和doc文件路径以及导出mongo中该品牌的doc和img
import os
import pandas as pd
# 检查品牌和文件名之间有中缀文件夹的个数
def file_folder(file):
j = 0
for i in file.iloc[:,0]:
countn = i.count('/')
if countn > 5:
j += 1
return j
def check_consistency_imgdoc():
data_txts = []
result = []
for brand in brands:
for check in checks:
x = y = z = s = 0
result.append((' ', '', ' '))
# result.append({f'品牌:{brand}', f'检查内容:{check}','数量'})
for file_name in file_names:
if brand in file_name and check in file_name and '~$' not in file_name:
file_path = path + '\\' + file_name
if '.xlsx' in file_name:
data_xlsx = pd.read_excel(file_path) # 获取到的excel
else:
data_txts = pd.read_table(file_path,header=None) #获取到的88文件
data_xlsx:pd.DataFrame
list_name = check + '_name'
# 1-1: 获取59的文件名
orig_59 = data_xlsx[list_name]
# 1-2: 59文件名总数
len_orig_59 = orig_59.shape[0]
# 1-3: 59文件名去重
name_59 = list(set(orig_59))
# 1-4: 59文件名去重后个数
len_name_59 = len(name_59)
# 1-5: 59中已下载的文件名
down_file_59 = data_xlsx[data_xlsx['is_download'] == 1][list_name]
# 1-6: 59中已下载的文件去重
down_file_59_reduce = list(set(down_file_59))
# 1-7: 59中已下载文件去重个数
len_down_file_59_reduce = len(down_file_59_reduce)
# 1-8: 获取s_download为0的文件列表
isnot_download = list(set(data_xlsx[data_xlsx['is_download'] == 0][list_name]))
# 1-9: 获取is_download为0的数量
isnot_download_number = data_xlsx[data_xlsx['is_download'] == 0].shape[0]
# 1-10: 获取is_download为0文件去重之后的数量
is_notdownload_number = len(isnot_download)
# 1-11: 未下载文件百分比
percent = str((lambda x: format(x, '.2%'))(is_notdownload_number / len_name_59))
# 2-1: 获取88的文件名
orig_88 = []
for data in data_txts.iloc[:,0]:
orig_88.append(data.split('\\')[-1]) # 看路径情况 选择是/还是\
# 2-2: 88文件个数
len_orig_88 = len(orig_88)
# 2-3: 88文件名去重
name_88 = list(set(orig_88))
# 2-4: 88文件名去重后个数
len_name_88 = len(name_88)
# 2-5: 88文件的文件夹数
file_folders = file_folder(data_txts)
'''判断过程'''
# 3-1: 判断59已下载的文件不在88上面的数量
for name in down_file_59_reduce:
if name not in name_88:
x += 1
# 3-2: 判断88里是否有多余的59(全部)文件
for name in name_88:
if name not in name_59:
y += 1
# 3-3: 59中未下载的文件在88上的数量
for name in isnot_download:
if name in name_88:
z += 1
# 3-4: 不区分大小写:59上面显示已下载的文件 和 88文件路径对比结果
upper_name_88 = [x.upper() for x in name_88]
for i in down_file_59_reduce:
if i.upper() in upper_name_88:
s += 1 # s 是转换大小写之后所有能找到的文件
change_upper_solve_num = s - (len_down_file_59_reduce - x)
'''结果显示:'''
# 59部分
result.append((f'{brand} - {check}', f'59-{check}总数', len_orig_59))
result.append((f'{brand} - {check}', f'59-{check}去重数量', len_name_59))
result.append((f'{brand} - {check}', f'59-未下载{check}数量(is_download)', isnot_download_number))
result.append((f'{brand} - {check}', f'59-未下载{check}去重数量(is_download)', is_notdownload_number))
result.append((f'{brand} - {check}', f'59-未下载{check}百分比', percent))
# 88部分
result.append((f'{brand} - {check}', f'88-{check}总数', len_orig_88))
result.append((f'{brand} - {check}', f'88-{check}去重数量(不必要)', len_name_88))
result.append((f'{brand} - {check}', f'88-{check}的文件夹数(需处理)', file_folders))
'''判断结果'''
if x:
result.append((f'{brand} - {check}', f'59中已下载的{check}不在88上的数量', x))
else:
result.append((f'{brand} - {check}', f'59已下载{check}全部存在', ''))
if y:
result.append((f'{brand} - {check}', f'88中无效{check}数量(修改大小写之后数目能减少)', y ))
else:
result.append((f'{brand} - {check}', f'88中没有无效{check}', ''))
if z:
result.append((f'{brand} - {check}', f'59中未下载{check}在88上的数量', z))
else:
result.append((f'{brand} - {check}', f'59中未下载{check}也不在88上', ''))
if change_upper_solve_num:
result.append((f'{brand} - {check}', f'59中已下载{check}转换大小写就能在88里找到的总数', s))
result.append((f'{brand} - {check}', f'转换大小写之后就能找到的文件数(新增)', change_upper_solve_num))
result.append((f'{brand} - {check}', f'59中已下载的文件转换大小写还没找到的文件名去重', x - change_upper_solve_num))
result.append((f'{brand} - {check}', f'59中实际缺少的文件去重(已下载未上传+未下载未上传)', x - change_upper_solve_num + z))
else:
result.append((f'{brand} - {check}', f'88{check}区分大小写也不管用', ''))
result_end = pd.DataFrame(result,columns=['品牌-分类','检查内容','数量'])
result_end1 = result_end[['品牌-分类','数量','检查内容']][1:]
print(result_end1)
brands = ['INFINEON']
checks = ['doc']
path = r"D:\FileRecv\新建文件夹" # 文件夹路径
file_names = os.listdir(path)
check_consistency_imgdoc()