观察数据
import pandas as pd
path = r""
chunks = pd.read_csv(path, chunksize=50)
for chunk_i in chunks:
print(chunk_i.head())
break
按时间戳筛选数据
import pandas as pd
datapath = r".csv"
chunks = pd.read_csv(path, chunksize=500000)
i = 1
for chunk_i in chunks:
chunk_i.columns = ['uid','itemid','catid','behavior','timestamp']
chunk_i['date'] = pd.to_datetime(chunk_i['timestamp'],unit='s').dt.date
chunk_i['year'] = pd.to_datetime(chunk_i['timestamp'], unit='s').dt.year
chunk_i['month'] = pd.to_datetime(chunk_i['timestamp'], unit='s').dt.month
chunk_i['hour'] = pd.to_datetime(chunk_i['timestamp'], unit='s').dt.hour
chunk_i = chunk_i[chunk_i['year'] == 2017]
chunk_i.to_csv(r'PATH',mode='a', encoding='utf-8', header=False,index=False)
print("已经完成第{}个chunk".format(str(i)))
i += 1
if i > 40:
break