pandas分块处理参考链接:
https://blog.csdn.net/weixin_43790560/article/details/88587123
https://blog.csdn.net/zcgyq/article/details/83088259
import os
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from sqlalchemy.types import NVARCHAR,INT,DECIMAL,FLOAT
import re
def divide(df,type,i):
if(i==0):
# 重写,带表头
df.to_csv('G:\\test\\data_'+type+'_.csv',index=False)
else:
# 接着写,不带表头
df.to_csv('G:\\test\\data_'+type+'_.csv',mode='a',index=False,header=False)
if __name__ == '__main__':
# 路径-------------------------------------------
path = ''
reader = pd.read_table(path,sep=',', chunksize=3,encoding="utf-8") ###分块读取参数记得改!!!
# chunk的type是DataFrame
i1 = 0
i2 = 0
i3 = 0
i4 = 0
i5 = 0
i6 = 0
j = 0
for chunk in reader:
for index, row in chunk.iterrows():
time = str(row['time'])[0:4]
break
# 数据里每个块的每行数据time是一样的,为了节约时间就不按行读了
# 判断参数,写入文件(这里我的参数是time)
if time == '2013':
divide(chunk,'daily',time,i1)
i1 += 1
elif time == '2014':
divide(chunk,'daily',time,i2)
i2 += 1
elif time == '2015':
divide(chunk,'daily',time,i3)
i3 += 1
elif time == '2016':
divide(chunk,'daily',time,i4)
i4 += 1
elif time == '2017':
divide(chunk,'daily',time,i5)
i5 += 1
elif time == '2018':
divide(chunk,'daily',time,i6)
i6 += 1
else:
print(time[0:3])
j += 1
print('--------'+str(j)+'----------')