python时间序列 案例_python pandas 对时间序列文件处理的实例

import pandas as pd

from numpy import *

import matplotlib.pylab as plt

import copy

def read(filename):

dat=pd.read_csv(filename,iterator=True)

loop = True

chunkSize = 1000000

R=[]

while loop:

try:

data = dat.get_chunk(chunkSize)

data=data.loc[:,'B':'C'] # 切片

data=data[data.B==855] #条件选择

data['C']=pd.to_datetime(data['C']) # 转换成时间格式

data=data.set_index(['C']) # 设置索引

data.loc[:,'D']=array([1]*len(data)) #增加一列

data=data.resample('D').sum() #按天求和

data=data.loc[:,'D'] #截取

data.fillna(0) #填充缺失值

R.append(data)

except StopIteration:

loop = False

print ("Iteration is stopped.")

R.to_csv('855_pay.csv') # 保存

def read2(filename):

reader=pd.read_csv(filename,iterator=True)

loop = True

chunkSize = 100000

chunks = []

while loop:

try:

chunk = reader.get_chunk(chunkSize)

chunks.append(chunk)

except StopIteration:

loop = False

print ("Iteration is stopped.")

df = pd.concat(chunks, ignore_index=True)

return df

def read3save(filename):

dat=pd.read_csv(filename)

#data = dat.get_chunk(chunkSize)

data=dat.loc[:,'B':'C'] # 切片

data=data[data.B==855]#条件选择

print(shape(data))

data['C']=pd.to_datetime(data['C']) # 转换成时间格式

data=data.set_index(['C'])# 设置索引

if len(data)==0:

return

data.loc[:,'D']=array([1]*len(data)) #增加一列

data=data.resample('D').sum() #按天求和

data=data.loc[:,'D'] #截取

data.fillna(0) #填充缺失值

data.to_csv('855_pay.csv',mode='a') # 保存

def loadDataSet(fileName, delim='\t'):

fr = open(fileName)

stringArr = [line.strip().split(delim) for line in fr.readlines()]

datArr = [list(map(float,line)) for line in stringArr]

return mat(datArr)

def getShopData():

fr = open('shopInfo.txt')

shopID = [line.strip().split('\n') for line in fr.readlines()]

# datArr = [list(map(float,line))for line in stringArr]

for i in range(1,9):

name="user_pay.001.00%d"%i

dat=pd.read_csv(name)

#data = dat.get_chunk(chunkSize)

data=dat.loc[:,'B':'C'] # 切片

for factor in shopID:

data=data[data.B==int(str(factor[0]))]#条件选择

print(shape(data))

if len(data)==0: continue

data['C']=pd.to_datetime(data['C']) # 转换成时间格式

data=data.set_index(['C'])# 设置索引

data.loc[:,'D']=array([1]*len(data)) #增加一列

data=data.resample('D').sum() #按天求和

data=data.loc[:,'D'] #截取

data.fillna(0) #填充缺失值

s=str(factor[0])

savename='D:\python\data\%s_pay.csv'%s

data.to_csv(savename,mode='a') # 保存

del dat

print("over")

def tset(filename):

dat=pd.read_csv(filename)

#data = dat.get_chunk(chunkSize)

data=dat.loc[:,'B':'C'] # 切片

data=data[data.B==855]#条件选择

print(shape(data))

data['C']=pd.to_datetime(data['C']) # 转换成时间格式

data=data.set_index(['C'])# 设置索引

if len(data)==0:

return

data.loc[:,'D']=array([1]*len(data)) #增加一列

data=data.resample('D').sum() #按天求和

data=data.loc[:,'D'] #截取

data.fillna(0) #填充缺失值

#data.to_csv('855_pay.csv',mode='a') # 保存

s='my'

savename='D:\python\data\%s_pay.csv'%s

data.to_csv(savename,mode='a') # 保存

def getShopData2(filename):

import csv

# fr = open('shopInfo.txt')

# shopID = [line.strip().split('\n') for line in fr.readlines()]

# datArr = [list(map(float,line))for line in stringArr]

#for i in range(1,9):

#name="user_pay.001.00%d"%i

dat=pd.read_csv(filename)

#data = dat.get_chunk(chunkSize)

data=dat.loc[:,'B':'C'] # 切片

data['C']=pd.to_datetime(data['C']) # 转换成时间格式

data=data.set_index(['C'])# 设置索引

data.loc[:,'D']=array([1]*len(data)) #增加一列

for i in range(1,2001):

d=copy.copy(data)

d=d[data.B==i]#条件选择

#print(shape(d))

print(i)

if len(d)==0: continue

d=d.resample('D').sum() #按天求和

d=d.loc[:,'D'] #截取

d.fillna(0) #填充缺失值

s=str(i)

#print(s)

savename='D:\python\data2\%s_pay.csv'%s

c=open(savename,'a')

writer=csv.writer(c)

writer.writerow(['C','D'])

c.close()

d.to_csv(savename,mode='a') # 保存

# del dat

print("over")

def formatData():

#fr = open('shopInfo.txt')

#shopID = [line.strip().split('\n') for line in fr.readlines()]

# datArr = [list(map(float,line))for line in stringArr]

#data = dat.get_chunk(chunkSize)

for i in range(1,2001):

s=str(i)

print(s)

name='D:\python\data2\%s_pay.csv'%s

dat=pd.read_csv(name)

data['C']=pd.to_datetime(data['C']) # 转换成时间格式

data=data.set_index(['C'])# 设置索引

data=data.resample('D').sum() #按天求和

data.fillna(0) #填充缺失值

savename='D:\python\data3\%s_pay.csv'%s

data.to_csv(savename,mode='w') # 保存

del dat

print("over")

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值