由于您的数据不适合read_fwf中的插值,所以您可以自己扫描和解析头。一旦计算出列名和列宽,就可以将列名和列宽传递给read_fwf,文件指针在第一行打开。页眉和数据之间的虚线分隔符是列宽的一个很好的指示器,所以我用它来计算列宽。在import pandas as pd
import re
# write a test file...
open('test.txt', 'w').write("""\
;; Param
;;Node Parameter Time Series Type
;;
80408 FLOW 80408 FLOW
81009 FLOW 81009 FLOW
82309 FLOW 82309 FLOW """)
def make_dataframe(filename):
with open('test.txt') as fp:
# grab header
headers = []
for line in fp:
if not line.startswith(';;-'):
# header line, swap ' ' for ';;' to maintain len
headers.append(' ' + line[2:-1])
else:
break
else:
print("ERROR: Header separator not found")
return None
# end of header, convert ' ' separators to field lengths
field_lens = [len(m)+1 for m in re.findall(r"\-+", ' ' + line[2:-1])]
# flatten multiline column names
start = 0
pd_header = []
for f_len in field_lens:
pd_header.append(' '.join(field.strip()
for field in (h[start:start+f_len] for h in headers)
if field.strip()))
start += f_len
# read fix length columns
df = pd.read_fwf(fp, header=None, names=pd_header, widths=field_lens,
index_col=False)
return df
df = make_dataframe('test.txt')
print(df)