阿里AI天池大赛-资金流入流出预测-基于周期因子的时间序列预测

本文链接：https://blog.csdn.net/chongfa2008/article/details/119398340

1：报名地址

https://tianchi.aliyun.com/competition/entrance/231573/score

2：排名分数

3：模型源码

废话不多说，直接上源码

import pandas as pd

#数据加载
data=pd.read_csv('./user_balance_table.csv')
data

#给数据添加时间维度
def add_timestamp(data):
    #时间格式转换
    data['report_date']=pd.to_datetime(data['report_date'],format='%Y%m%d')
    #添加时间维度
    data['day']=data['report_date'].dt.day
    data['month']=data['report_date'].dt.month
    data['year']=data['report_date'].dt.year
    #一年中的第多少个week
    data['week']=data['report_date'].dt.week
    data['weekday']=data['report_date'].dt.weekday
    return data
# 2014-08-05 day=5 weekday=1 代表周二
data=add_timestamp(data)
data

data['weekday'].value_counts()

def get_total_balance(data,begin):
    #将data进行备份，再copy数据上进行操作
    df_temp=data.copy()
    #按照report_date进行聚合
    df_temp=df_temp.groupby(['report_date'])['total_purchase_amt','total_redeem_amt'].sum()
    #还原df_temp的索引
    df_temp.reset_index(inplace=True)
    df_temp=df_temp[(df_temp['report_date']>=begin)]
    return df_temp


#筛选从2014-03-01之后的数据
total_balance=get_total_balance(data,'2014-03-01')
total_balance



import numpy as np
import datetime
#生产测试数据
def generate_test_data(data):
    #在copy数据上进行操作
    total_balance=data.copy()
    #生成2014-09-01 到2014-09-30的数据
    start=datetime.datetime(2014,9,1)
    end=datetime.datetime(2014,10,1)
    testdata=[]
    while start!=end:
        #3个字段：date,total_purchase_amt,total_redeem_amt
        temp=[start,np.nan,np.nan]
        testdata.append(temp)
        #日期+1
        start=start+datetime.timedelta(days=1)
    #封装testdata
    testdata=pd.DataFrame(testdata)
    testdata.columns=total_balance.columns
    #将testdara合并到total_balance    
    result=pd.concat([total_balance,testdata],axis=0)
    return result

total_balance=generate_test_data(total_balance)
total_balance

# 对total_balance添加时间维度
total_balance=add_timestamp(total_balance)
total_balance

#存储备份
temp=total_balance.copy()


#从这里开始运行，每次都是取得原始数据（temp）
total_balance=temp.copy()
weekday_weight=total_balance[['weekday','total_purchase_amt','total_redeem_amt']].groupby('weekday',as_index=False).mean()
weekday_weight

weekday_weight.columns=['weekday','purchase_weekday','redeem_weekday']
weekday_weight

weekday_weight["purchase_weekday"]/=np.mean(total_balance['total_purchase_amt'])
weekday_weight["redeem_weekday"]/=np.mean(total_balance['total_redeem_amt'])
weekday_weight

total_balance=pd.merge(total_balance,weekday_weight,on='weekday',how='left')
total_balance

#计算日期因子
#统计周一到周日，再1-31号出现的频次 weekday,day出现的频次
weekday_count=total_balance[['report_date','day','weekday']].groupby(['day','weekday'],as_index=False).count()
weekday_count

weekday_count=pd.merge(weekday_count,weekday_weight,on='weekday')
weekday_count

#日期计算=周期因子*（周一到周日再1-31号中的出现频次）/一共有几个月份
#
#
#
weekday_count['purchase_weekday']=weekday_count['purchase_weekday']*weekday_count['report_date']/len(np.unique(total_balance['month']))
weekday_count


weekday_count['redeem_weekday']=weekday_count['redeem_weekday']*weekday_count['report_date']/len(np.unique(total_balance['month']))
weekday_count

#得到日期因子

#得到周期因子，对日期的影响
day_rate=weekday_count.drop(['weekday','report_date'],axis=1).groupby('day',as_index=False).sum()
day_rate


#计算日期平均值，即1号的平均值，2号的平均值
day_mean=total_balance[['day','total_purchase_amt','total_redeem_amt']].groupby('day',as_index=False).mean()
day_mean


day_base=pd.merge(day_mean,day_rate,on='day',how='left')
#去掉周期因子影响，之后的日期因子

day_base['total_purchase_amt']/=day_base['purchase_weekday']
day_base['total_redeem_amt']/=day_base['redeem_weekday']
day_base

for index,row in day_base.iterrows():
    if row['day']==31:
        break
    #添加日期字段 report_date
    day_base.loc[index,'report_date']=pd.to_datetime('2014/09/'+str(int(row['day'])))

day_base

#利用周期因子，进行base*周期因子=预测结果
day_base['weekday']=day_base['report_date'].dt.weekday
day_base

day_pred=day_base[['day','total_purchase_amt','total_redeem_amt','report_date','weekday']]
day_pred

day_pred=pd.merge(day_pred,weekday_weight,on='weekday')
day_pred


day_pred['total_purchase_amt']*=day_pred['purchase_weekday']
day_pred['total_redeem_amt']*=day_pred['redeem_weekday']
day_pred

day_pred=day_pred.sort_values('report_date')[['report_date','total_purchase_amt','total_redeem_amt']]
day_pred

#去掉日期类型中的 -
day_pred['report_date']=day_pred['report_date'].apply(lambda x:str(x).replace('-','')[0:8])
day_pred

day_pred.to_csv('Baseline131.csv',index=False,header=None)