我手工做了一个版本,因为read_fwf和其他转换方法无法读取我的.rpt文件for file_name in file_names:
list_all = []
print('Starting File:', file_name)
with open(os.path.join(INPUT_PATH, file_name),'r', encoding="utf8") as file:
i= 0
for line in file:
if i == 1:
sizes = re.split(' ',line)
sizes_ = [len(re.sub('^\-','',x)) for x in sizes]
break
i += 1
with open(os.path.join(INPUT_PATH, file_name), 'r', encoding="utf8") as file:
i = 0
for line in file:
if i == 0:
line = re.sub(r'[^\x00-\x7F]+','', line)
i += 1
if not line[0:1] in ['Ã','á'] and line[0:3]!=' -' and len(line.strip()) > 3 and line[:16] != 'Completion time:':
grabber = []
trace = 0
for dist in sizes_:
grabber.append(line[trace:dist+2+trace].strip())
trace += dist+2
list_all.append(grabber)
headers = ['_'.join(i.split('|')[-1:]) for i in list_all[0]]
df = pd.DataFrame(list_all[1:], columns=headers)
new_name = file_name.replace('.rpt','.csv')
df.to_csv(os.path.join(PROCCESSED_PATH,new_name), index=False)
print('Outputted File:', new_name)