- 问题描述:
- 1、大批量数据表,字段为:shop_name、site、sku、date、quantity
- 2、日期不连贯,且有有多sku对应的日期是缺失的。
方法一:pandas、切片
test1=order_data.reset_index()
#近7天销量和
for i in test1.index:
test1[‘purchase_date’]=pd.to_datetime(test1[‘purchase_date’],format=’%Y-%m-%d’)
qiepian = test1.loc[(((test1[‘shop_name’]==test1.loc[i][‘shop_name’]))&
((test1[‘site’]==test1.loc[i][‘site’]))&
((test1[‘sku’]==test1.loc[i][‘sku’]))&
((test1.loc[i,‘purchase_date’]-test1[‘purchase_date’]).apply(lambda x : x.days)<7)&
((test1.loc[i,‘purchase_date’]-test1[‘purchase_date’]).apply(lambda x : x.days)>-1))]
day7_sum=qiepian[‘quantity’].sum()
#print(qiepian)
#print(day7_sum)
test1.loc[i,‘day7_sum’]=day7_sum
#近15天销量和
for i in test1.index:
test1[‘purchase_date’]=pd.to_datetime(test1[‘purchase_date’],format=’%Y-%m-%d’)
qiepian = test1.loc[(((test1[‘shop_name’]==test1.loc[i][‘shop_name’]))&
((test1[‘site’]==test1.loc[i][‘site’]))&
((test1[‘sku’]==test1.loc[i][‘sku’]))&
((test1.loc[i,‘purchase_date’]-test1[‘purchase_date’]).apply(lambda x : x.days)<15)&
((test1.loc[i,‘purchase_date’]-test1[‘purchase_date’]).apply(lambda x : x.days)>-1))]
day15_sum=qiepian[‘quantity’].sum()
#print(qiepian)
#print(day7_sum)
test1.loc[i,‘day15_sum’]=day15_sum
#近30天销量和