一个文件夹下有许多Excel文件和CSV文件,对这些文件进行扫描统计其中的数据,最后将数据写入CSV文件;查找条件存在一个CSV文件中
Excel文件或者CSV文件每一行第一列在下面列表里
1234
2345
4567
6789
7890
import csv
import glob,os,sys
from datetime import datetime,date
from xlrd import open_workbook,xldate_as_tuple
item_number_file='item_numbers_to_find.csv'
path_to_folder='file_archive'
output_file='output_file\output.csv'
item_number_to_find=[]
with open(item_number_file,'r',newline='') as item_number_csv_file:
filereader=csv.reader(item_number_csv_file)
for row in filereader:
item_number_to_find.append(row[0])
print(item_number_to_find)
filewriter=csv.writer(open(output_file,'a',newline=''))
file_counter=0
line_counter=0
count_of_item_number=0
for input_file in glob.glob(os.path.join(path_to_folder,'*.*')):
file_counter+=1
print(input_file)
if input_file.split('.')[1]=='csv':
with open(input_file,'r',newline='') as csv_in_file:
filereader=csv.reader(csv_in_file)
header=next(filereader)
for row in filereader:
row_of_output=[]
for column in range(len(header)):
if column==3:
cell_value=str(row[column]).lstrip('$').replace(',','').strip()
row_of_output.append(cell_value)
else:
cell_value=str(row[column]).strip()
row_of_output.append(cell_value)
row_of_output.append(os.path.basename(input_file))
if row[0] in item_number_to_find:
filewriter.writerow(row_of_output)
count_of_item_number+=1
line_counter+=1
elif input_file.split('.')[1] =='xls' or 'xlsx':
workbook=open_workbook(input_file)
for worksheet in workbook.sheets():
try:
header=worksheet.row_values(0)
except IndexError:
pass
for row in range(1,worksheet.nrows):
row_of_output=[]
for column in range(len(header)):
if worksheet.cell_type(row,column) ==3:
cell_value=xldate_as_tuple(worksheet.cell(row,column).value,workbook.datemode)
cell_value=str(date(*cell_value[0:3])).strip()
row_of_output.append(cell_value)
else:
cell_value=str(worksheet.cell(row,column).value).strip()
row_of_output.append(cell_value)
row_of_output.append(os.path.basename(input_file))
row_of_output.append(worksheet.name)
if str(worksheet.cell(row,0).value).split('.')[0].strip() in item_number_to_find:
filewriter.writerow(row_of_output)
count_of_item_number+=1
line_counter+=1
print('number of files:',file_counter)
print('number of lines:',line_counter)
print('number of item numbers:',count_of_item_number)