对比了readline速度,
使用list.append并一次合并DataFrame,
使用逐行创建DataFrame,再用DataFrame.append合并,
使用DataFrame.loc[] = []的方式逐行插入数据。
为DataFrame插入数据是非常不建议的选择,
应该直接用list装入所有数据,最后一次性合成DataFrame。
(文件大小:6200 kb,43000 行)
import pandas as pd
from datetime import *
def test(path):
start = datetime.now()
with open(path) as f:
all = pd.DataFrame()
while True:
i_line = f.readline()
if i_line == "":
break
if i_line.find("Date") == -1:
new = pd.DataFrame({"Date":i_line.split(",")[0],"value":i_line.split(",")[6]},index=["0"])
all = all.append(new, ignore_index=True)
end = datetime.now()
print(end-start)
return all
def test_loc(path):
start = datetime.now()
with open(path) as f:
all_loc = pd.DataFrame({"Date":[],"value":[]},index=[])
n=0
while True:
i_line = f.readline()
if i_line == "":
break
if i_line.find("Date") == -1:
all_loc.loc[n] = [i_line.split(",")[0], i_line.split(",")[6]]
n+=1
end = datetime.now()
print(end-start)
return all_loc
def test_readline(path):
start = datetime.now()
with open(path) as f:
while True:
i_line = f.readline()
if i_line == "":
break
if i_line.find("Date") == -1:
pass
end = datetime.now()
print(end-start)
def test_list(path):
start = datetime.now()
with open(path) as f:
list_date = []
list_value = []
while True:
i_line = f.readline()
if i_line == "":
break
if i_line.find("Date") == -1:
list_date.append(i_line.split(",")[0])
list_value.append(i_line.split(",")[6])
all_list = pd.DataFrame({"Datetime":list_date, "value":list_value})
end = datetime.now()
print(end-start)
return all_list
test_readline(path)
df_list = test_list(path)
df = test(path)
df_loc = test_loc(path)
# 0.006s
# 0.153s
# 1M0.52S
# 2M40S