import pandas as pd
import datetime
import warnings
warnings.filterwarnings('ignore')
#数据导入
#调查日期
today = datetime.date.today()
searchdate = datetime.datetime.strftime(today, "%Y-%m-%d")
path = r'C:\Users\wuxian\Desktop\\\'
#读取共有多少行数据
with open(r"20200301之前采购数据.csv", encoding="utf-8") as f:
size = len(f.readlines())
print("CSV line number: %d" % (size))
#导入部分数据进行测试,并删除无用字段
reader0 = pd.read_csv(path+'20200301之前采购数据.csv',nrows=1000)
reader0
columns = reader0.dropna(axis=1).columns.tolist()#指定参数iterator = True返回一个可迭代对象TextFileReader :
reader = pd.read_csv(path+'20200301之前采购数据.csv',iterator=True,chunksize = 100000,usecols = columns)
#法一
loop = True
chunksize = 10000000
chunks = []
while loop:
try:
chunk = reader.get_chunk(chunksize)
chunks.append(chunk)
except StopIteration:
loop = False
print('Iteration is stopped')
df = pd.concat(chunks,ignore_index=True)
df.shape
df.tail()
#法二
mylist = []
for i in reader:
mylist.append(i)
print(i.shape)
finaldf = pd.DataFrame(mylist)
finaldf.shape
python读取.csv 大文件的解决办法(iterator=true)
最新推荐文章于 2022-07-01 14:29:41 发布