Python在一个大文件集合中查找一组项目

一个文件夹下有许多Excel文件和CSV文件,对这些文件进行扫描统计其中的数据,最后将数据写入CSV文件;查找条件存在一个CSV文件中
Excel文件或者CSV文件每一行第一列在下面列表里
1234
2345
4567
6789
7890

import csv
import glob,os,sys
from datetime import datetime,date
from xlrd import open_workbook,xldate_as_tuple
item_number_file='item_numbers_to_find.csv'
path_to_folder='file_archive'
output_file='output_file\output.csv'
item_number_to_find=[]
with open(item_number_file,'r',newline='') as item_number_csv_file:
    filereader=csv.reader(item_number_csv_file)
    for row in filereader:
        item_number_to_find.append(row[0])
print(item_number_to_find)
filewriter=csv.writer(open(output_file,'a',newline=''))
file_counter=0
line_counter=0
count_of_item_number=0
for input_file in glob.glob(os.path.join(path_to_folder,'*.*')):
    file_counter+=1
    print(input_file)
    if input_file.split('.')[1]=='csv':
        with open(input_file,'r',newline='') as csv_in_file:
            filereader=csv.reader(csv_in_file)
            header=next(filereader)
            for row in filereader:
                row_of_output=[]
                for column in range(len(header)):
                    if column==3:
                        cell_value=str(row[column]).lstrip('$').replace(',','').strip()
                        row_of_output.append(cell_value)
                    else:
                        cell_value=str(row[column]).strip()
                        row_of_output.append(cell_value)
                row_of_output.append(os.path.basename(input_file))
                if row[0] in item_number_to_find:
                    filewriter.writerow(row_of_output)
                    count_of_item_number+=1
                line_counter+=1
    elif input_file.split('.')[1] =='xls' or 'xlsx':
        workbook=open_workbook(input_file)
        for worksheet in workbook.sheets():
            try:
                header=worksheet.row_values(0)
            except IndexError:
                pass
            for row in range(1,worksheet.nrows):
                row_of_output=[]
                for column in range(len(header)):
                    if worksheet.cell_type(row,column) ==3:
                        cell_value=xldate_as_tuple(worksheet.cell(row,column).value,workbook.datemode)
                        cell_value=str(date(*cell_value[0:3])).strip()
                        row_of_output.append(cell_value)
                    else:
                        cell_value=str(worksheet.cell(row,column).value).strip()
                        row_of_output.append(cell_value)
                row_of_output.append(os.path.basename(input_file))
                row_of_output.append(worksheet.name)
                if str(worksheet.cell(row,0).value).split('.')[0].strip() in item_number_to_find:
                    filewriter.writerow(row_of_output)
                    count_of_item_number+=1
                line_counter+=1
print('number of files:',file_counter)
print('number of lines:',line_counter)
print('number of item numbers:',count_of_item_number)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值