上一篇中,将xlsx转化为csv的方法中,主要耗时在于用的xlrd读取文件。之后接触到了强大的pandas,文件转化时间大大缩短。
def xls2csv3(file):
try:
xls = pd.read_excel(file, header=0)
except FileNotFoundError:
msg = file + ' does not exist'
print(msg)
else:
# loc 获取指定行的值
rows = xls.loc[1]
print(rows)
col_need = []
for i in range(len(rows)):
if 'int' in str(rows[i]):
col_need.append(i)
# print(sheet1.col_values(i))
# print(rows)
# print(cols)
out_file = file[:file.find('.')] + '_2.csv'
# reade_excel返回的是一个Dataframe类型
data = pd.read_excel(file, usecols=col_need)
header = ['#v100']
for i in range(len(col_need)-1):
header.append("")
data.columns = header
data.to_csv(out_file, encoding='gbk', index=