pandas读取文件
starts=time.time()
for file in files[:1]:
print(file)
file_paths = os.path.join(root, file)
print(file_paths)
list2=[]
# with open(file_paths,'r') as f:
# for row in f:
# list2 = [row.split(',')[line] for row in f]
# #list2.append(row.split()[0])
# app_data[file]=list2
df_y = pd.read_csv(file_paths, engine='python')[data]
print(app_data)
end=time.time()
python open读取文件
line=int(data)
# x文件有很多
for root, dirs, files in os.walk(train_file_x):
# 用第一个文件作为例子
app_data = pd.DataFrame()
starts=time.time()
for file in files[:1]:
print(file)
file_paths = os.path.join(root, file)
print(file_paths)
list2=[]
with open(file_paths,'r') as f:
for row in f:
list2 = [row.split(',')[line] for row in f]
#list2.append(row.split()[0])
app_data[file]=list2
#df_y = pd.read_csv(file_paths, engine='python')[data]
print(app_data)
对比结果:
pandas和python:open 读取同样的文件,花费的效率相差 5倍
所以,提高效率的办法就是用 python:open读取csv文件,然后在转成DataFrame,如果之后需要用DataFrame的话。