【资金流入流出预测】 数据探索

import pandas as  pd
import numpy as np
import warnings 
import datetime
import seaborn as sns
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')
%matplotlib inline
plt.rcParams['font.sans-serif'] = ['KaiTi'] # 指定默认字体
plt.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题

字段了解

用户信息表: user_profile_table 。

我们总共随机抽取了约 3 万用户,其中部分用户在 2014 年 9 月份第一次出现,这部分用户只在测试数据中 。因此用户信息表是约 2.8 万 个用户的基本数据,在原始数据的基础上处理后,主要包含了用户的性别、城市和星座。具体的字段如下表 1 
user_p = pd.read_csv('user_profile_table.csv',index_col='user_id')
user_p  # 用户信息 性别 城市(数字编码) 星座 
user_p.shape 
(28041, 3)
user_b = pd.read_csv('user_balance_table.csv',index_col='user_id')

户申购赎回数据表:user_balance_table 。

里面有 20130701 至 20140831 申购和赎回信息、以及所有的子类目信息, 数据经过脱敏处理。脱敏之后的数据,基本保持了原数据趋势。数据主要包括用户操作时间和操作记录,其中操作记录包括申购和赎回两个部分。金额的单位是分,即 0.01 元人民币。 如果用户今日消费总量为0,即consume_amt=0,则四个字类目为空。

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-tF8sc2By-1597837748914)(attachment:image.png)]

user_b.isna().sum()  # balance记录了 每个用户每一天的交易记录 
user_b.fillna(0,inplace=True)  # 缺失值填充

收益表为余额宝在 14 个月内的收益率表:mfd_day_share_interest 。

mfd_7daily_yield 七日年化收益率是货币基金过去七天每万份基金份额净收益折合成的年收益率。
mfd_daily_yield 万份收益就是指基金公司通常日公布当日每万份基金单位实现的收益金额。
interest = pd.read_csv('mfd_day_share_interest.csv')

上海银行间同业拆放利率(Shibor)表

银行间拆借利率表是 14 个月期间银行之间的拆借利率(皆为年化利率): mfd_bank_shibor 。具体字段如下表格 4 所示
shibor = pd.read_csv('mfd_bank_shibor.csv')

数据观察

user_p

没有足够信息 可能需要结合其他表格查看
user_p.head()
sexcityconstellation
user_id
216411949狮子座
1216412149摩羯座
2216411949双子座
2316411949双鱼座
2516481949双鱼座
user_p['sex'].value_counts()  # 男女分布均衡
1    14489
0    13552
Name: sex, dtype: int64

user_b 总体趋势

user_b['date'] = pd.to_datetime(user_b['report_date'], format= "%Y%m%d")
total_b = user_b.groupby(by='date').sum()
total_b.head()
report_datetBalanceyBalancetotal_purchase_amtdirect_purchase_amtpurchase_bal_amtpurchase_bank_amttotal_redeem_amtconsume_amttransfer_amttftobal_amttftocard_amtshare_amtcategory1category2category3category4
date
2013-07-018877639141252327861225364535324883483245878621250722112080645525022382160514286251428620295620.089350.00.0292810.0
2013-07-029662736960278810703252327861290373902900751016561183124463272554548350066220448222044820298800.01650.00.0348416.0
2013-07-031004522079730012760627881070327270770272341562049366067404965953867454406549946154994610366140.069764.00.0384642.0
2013-07-0410528358192312038062300127606183211851828223012108178617405264107295353915875338587533803895525078.0107500.00.0402813.0
2013-07-05109511035203209232243120380621164874911607991693043146775602763587697926206566120656610407580.051100.00.0646826.0
plt.figure(figsize=(20,5))
for i in ['total_purchase_amt','total_redeem_amt']:
    plt.plot(total_b[i].index,total_b[i].values,label=f'{i}')
    plt.legend()
    
# 观察在14-1至14-4之间有着较大波动  四月之后开始平缓

在这里插入图片描述

plt.figure(figsize=(20,5))
for i in ['total_purchase_amt','total_redeem_amt']:
    plt.plot(total_b[i][total_b[i].index >pd.to_datetime('2014-4-1')].index,
             total_b[i][total_b[i].index >pd.to_datetime('2014-4-1')].values,label=f'{i}')
    plt.legend()

在这里插入图片描述

user_b 工作日和周末

total_b.reset_index(inplace=True)
total_b['weekday'] = total_b['date'].dt.weekday
tb_weekday_mean = total_b[['weekday','total_purchase_amt','total_redeem_amt']].groupby(by='weekday').mean()
plt.figure(figsize=(20,15))
plt.subplot(3,2,1)
sns.violinplot(x='weekday', y='total_purchase_amt', data = total_b)
plt.subplot(3,2,2)
sns.violinplot(x='weekday', y='total_redeem_amt', data = total_b)
plt.subplot(3,2,3)
sns.boxplot(x='weekday', y='total_purchase_amt', data = total_b)
plt.subplot(3,2,4)
sns.boxplot(x='weekday', y='total_redeem_amt', data = total_b)
plt.subplot(3,2,5)
sns.barplot(x=tb_weekday_mean.index,y=tb_weekday_mean['total_purchase_amt'])
plt.subplot(3,2,6)
sns.barplot(x=tb_weekday_mean.index,y=tb_weekday_mean['total_redeem_amt'])
<matplotlib.axes._subplots.AxesSubplot at 0x1d4c1e86978>

在这里插入图片描述

user_b 月份

total_b['month'] = total_b['date'].apply(lambda x:str(x)[:7])
tb_month_mean = total_b[['weekday','total_purchase_amt','total_redeem_amt']].groupby(by='weekday').mean()
plt.figure(figsize=(20,20))
plt.subplot(4,1,1)
sns.violinplot(x='month', y='total_purchase_amt', data = total_b)
plt.subplot(4,1,2)
sns.violinplot(x='month', y='total_redeem_amt', data = total_b)
plt.subplot(4,1,3)
sns.barplot(x='month', y='total_purchase_amt', data = total_b)
plt.subplot(4,1,4)
sns.barplot(x='month', y='total_redeem_amt', data = total_b)
<matplotlib.axes._subplots.AxesSubplot at 0x1d48bbaf198>

在这里插入图片描述

日期

total_b['week'] = total_b['date'].dt.week
total_b['year'] = total_b['date'].dt.year
total_b['week'] = total_b['year'].astype('str') + '_' + total_b['week'].astype('str')
day_table = total_b[['week','weekday','total_purchase_amt','total_redeem_amt',]]
tpa_day_c = pd.crosstab(day_table['week'],day_table['weekday'],values=day_table['total_purchase_amt'],aggfunc=np.mean)
tpa_day_c.loc['2014_1',:1] = tpa_day_c.loc['2013_1',:1]
tpa_day_c.drop(index='2013_1',inplace=True)
tpa_day_c = tpa_day_c.reset_index().join(tpa_day_c.reset_index()['week'].str.split('_',expand= True).rename(columns = {0:'w',1:'y'}))
tpa_day_c[['w','y']] = tpa_day_c[['w','y']].astype('int')
tpa_day_c.sort_values(by=['w','y'],inplace=True)
plt.figure(figsize=(16,9))
sns.heatmap(tpa_day_c.iloc[:,1:8],cmap='rainbow')
<matplotlib.axes._subplots.AxesSubplot at 0x1d48e1c0128>

在这里插入图片描述

节假日

total_b[total_b['date'] in pd.date_range('2014-5-1','2014-5-3')]
fest_51 = total_b[(total_b['date'] >= pd.to_datetime('2014-4-20'))&(total_b['date'] <= pd.to_datetime('2014-5-10'))]
fest_10 = total_b[(total_b['date'] >= pd.to_datetime('2013-9-20'))&(total_b['date'] <= pd.to_datetime('2013-10-20'))]

plt.figure(figsize=(20,10))
plt.subplot(2,1,1)
plt.axvspan(pd.to_datetime('2014-4-29'),pd.to_datetime('2014-5-1'), facecolor='m',alpha=0.1) 
for i in ['total_purchase_amt','total_redeem_amt']:
    plt.plot(fest_51['date'],fest_51[i].values,label=f'{i}')
    plt.legend()
plt.subplot(2,1,2)
plt.axvspan(pd.to_datetime('2013-10-1'),pd.to_datetime('2013-10-7'), facecolor='m',alpha=0.1) 
for i in ['total_purchase_amt','total_redeem_amt']:
    plt.plot(fest_10['date'],fest_10[i].values,label=f'{i}')
    plt.legend()

在这里插入图片描述

结合用户和交易

total_b
datereport_datetBalanceyBalancetotal_purchase_amtdirect_purchase_amtpurchase_bal_amtpurchase_bank_amttotal_redeem_amtconsume_amt...tftocard_amtshare_amtcategory1category2category3category4weekdaymonthweekyear
02013-07-018877639141252327861225364535324883483245878621250722112080645525022382160...0295620.089350.00.0292810.002013-072013_272013
12013-07-029662736960278810703252327861290373902900751016561183124463272554548350066...0298800.01650.00.0348416.012013-072013_272013
22013-07-031004522079730012760627881070327270770272341562049366067404965953867454406...0366140.069764.00.0384642.022013-072013_272013
32013-07-041052835819231203806230012760618321185182822301210817861740526410729535391...03895525078.0107500.00.0402813.032013-072013_272013
42013-07-05109511035203209232243120380621164874911607991693043146775602763587697926...0407580.051100.00.0646826.042013-072013_272013
52013-07-061113228041835605786132092322436751272367091903153348351757071616635300533...0420820.021800.00.0278733.052013-072013_272013
62013-07-071139398016236103735835605786189622328920335525776836625673982735336880...0418970.00.00.0336880.062013-072013_272013
72013-07-081167581064040994789536103735857258266572184094749093897274718347729852530...039857219851.03500.00.0629179.002013-072013_282013
82013-07-091236025532643327377740994789526798941267559432154116452147793473059765509...04299860000.00.00.0705509.012013-072013_282013
92013-07-1012460909490461373114433273777306965063064469120475437101692542597169603737...0518158000.0267500.00.0328237.022013-072013_282013
102013-07-111324600783850193951146137311444075197440210313870139253196393508800981695...054166325455.02194.00.0654046.032013-072013_282013
112013-07-1213628492024527630842501939511341839043412684528358933576791284925731371072...057059483604.0173268.00.0714200.042013-072013_282013
122013-07-13139707148225393127305276308421516471715103661846116866424933482829309654...0610560.045584.00.0264070.052013-072013_282013
132013-07-141403110765855914392653931273022615303225543541844750741068472784107592038...0609490.0182560.00.0409478.062013-072013_282013
142013-07-151437333051059416453855914392648128555480696862840122419668462131079431521794...058869679000.0222000.00.0620794.002013-072013_292013
152013-07-161487659912463292240459416453850622847505596183965779910901819118649811529687...063229744302.060000.00.0725385.012013-072013_292013
162013-07-17153999985056510265736329224042901568228946382251672713779111109115132109498...0693001348120.050000.00.0711378.022013-072013_292013
172013-07-181554091429666349572265102657324234505241605611263321211527349117653563195410...073944577438.02081710.00.0536262.032013-072013_292013
182013-07-1915863006572687931077663495722336801243360319025296006830718492447691204252...076934713031.0140760.00.0350461.042013-072013_292013
192013-07-2016305883200703769013687931077204390792036098114965131539585046011431780862...078098229770.0376900.00.01174192.052013-072013_292013
202013-07-211644679905772223007670376901321142394210646481415515269094962681331761299...077746100000.026681.00.0634618.062013-072013_292013
212013-07-2216909806480743534705722230076404488964037404933166661720738819144267605231...07484720220.064584.00.0520427.002013-072013_302013
222013-07-23174935982877772668017435347055813614758056583481050879951496244040512173231...079564483047.02000.00.01688184.012013-072013_302013
232013-07-2417815690740789430727777266801484225184833756138512434982512736258592712047...084957320000.012669.00.0379378.022013-072013_302013
242013-07-251823843685080865130978943072757433418573472094206476115282448382128361557611...086209526800.0183410.00.0847401.032013-072013_302013
252013-07-261862092155081418075780865130944721817446332793329963811333641391923691035450...088538469737.048517.00.0517196.042013-072013_302013
262013-07-27184800073868163163158141807571719445117102400104825776619823150588931922114...0920511005922.058834.00.0857358.052013-072013_302013
272013-07-2818842361408844888486816316315362553823616442132964064320035776832111161872...090961551807.027184.00.0582881.062013-072013_302013
282013-07-291916445400887980119884488848653512076534233604182004111603319185993641202332...088716559277.0121890.00.0521165.002013-072013_312013
292013-07-301952680810091423385987980119847481243473878173377976213608055130485821318647...093426510220.088782.00.0719645.012013-072013_312013
..................................................................
3972014-08-0224376412660620086601920200697600151890921301868893497847870410841064517225022541930977...95960150220278120388807.07149562.00.014392608.052014-082014_312014
3982014-08-0324442878520820133302100200866019201738253971716348656212057510951429012712521740828571...62502420219053217238818.09473820.00.014115933.062014-082014_312014
3992014-08-04244851754228201410354602013330210033064088432840447212090464620749982632290752478477516...200460527223641231529783.012074693.016500000.018373040.002014-082014_322014
4002014-08-05245194160070203141097912014103546039478087039258971516470555622788415922170653974040082...111691666219115526591098.014768284.016410000.016270700.012014-082014_322014
4012014-08-06245496284334203205842132031410979128882101628659829812764162415895667428234659456127546...199403632222271821291913.019643544.01700000.013492089.022014-082014_322014
4022014-08-0724579840862820314571173203205842132476464742454056586183750818356815025365951460050774...151933651224081615410546.028941346.04300000.011398882.032014-082014_322014
4032014-08-0824561715356020236826133203145711732339037172316169355403541617758151931164875767603275...200974417228678219163226.018669835.014800000.014970214.042014-082014_322014
4042014-08-0924622139002520233477189202368261331602627641580288874466309211336579516361170850648384...82935238223387718520392.022698987.00.09429005.052014-082014_322014
4052014-08-1024626168387020303102834202334771892595348702573132087642749818088571018990922555462188...109847134222166223077228.020717590.01000.011666370.062014-082014_322014
4062014-08-11247047187726202160499692030310283433155047132933137010202122822731014241860333660850688...316799953221910117201769.023642952.02600000.017405967.002014-082014_332014
4072014-08-12247872973284201647887842021604996925849367325631187910159276815471911130975485848112423...215083227218179415814510.014362076.01811000.016124837.012014-082014_332014
4082014-08-1324801397128220122319886201647887842615066192592911039908220716020889630397551755277175...225763451221551625161278.014424643.03000000.012691254.022014-082014_332014
4092014-08-14247752153014201680831152012231988625770266025544613011330389114214223921193943138957777...127579344225653016476951.010331366.00.012149460.032014-082014_332014
4102014-08-1524805427754020176118728201680831152445516202423360528333017815900587423651600747779700...150651042221556810680763.018761561.00.018337376.042014-082014_332014
4112014-08-1624865851433620171964125201761187282150597362128420475570587315713617421921433941164497...139686821221768917021673.012925634.05000.011212190.052014-082014_332014
4122014-08-1724914190629020182378312201719641251499782711477774034244247810533492513956408440085759...7133055622008687738809.019041259.00.013305691.062014-082014_332014
4132014-08-1824976628401820221708442201823783122984991462963086808455746521175121525916901655584679...150986027219046614850247.015566826.00.025167606.002014-082014_342014
4142014-08-19249967704609202331805382022170844226640197326419645810864568315555077525492987774648794...147252223220551529600198.011467928.02040000.031540668.012014-082014_342014
4152014-08-20250471237520203391064482023318053830837869230613360613442098317171262320245278273775504...100226182224508624165679.017235710.01650000.030724115.022014-082014_342014
4162014-08-2125047124995620370906609203391064482517635172495257128581818816370752421996335670493107...131262433223780528669192.010979534.0420000.030424381.032014-082014_342014
4172014-08-2225067267061220437873459203709066092463160562440941649726669514682746917934920674322748...91406173222189231216803.010023388.0210000.032872557.042014-082014_342014
4182014-08-232509345137572037990795520437873459141412027139192764427972549639551019937753163911142...107065000221926322294743.013156888.0100000.028359511.052014-082014_342014
4192014-08-242511762161042031902328820379907955130195484127991522431436868484783619108015152046987...123525938220396219194293.012132444.0900000.019820250.062014-082014_342014
4202014-08-2525192143910020316184100203190232883095742233073611419228202521507911631241341196656751...188597044221308235518047.011637924.01500000.048000780.002014-082014_352014
4212014-08-26252102719042203376506262031618410030694508930474749712446052718028697028547856388192408...182471401219759224879168.016497042.00.046816198.012014-082014_352014
4222014-08-272523041398292017168128020337650626302194801299973007102372488197600519468164147107408171...339789039222179434053795.019426628.06000000.047927748.022014-082014_352014
4232014-08-2825290837719620118870170201716812802450827512428876475673459618615305129789386198896299...183265388219510425606866.025743565.04000000.043545868.032014-082014_352014
4242014-08-29253693882084201126685032011887017026755471326533517287236719178098453273756380103967802...141809256221954131191914.030028785.040000.042707103.042014-082014_352014
4252014-08-3025383488049020116003141201126685031997087721974836638479832911268533419637413470813897...93107052222510918388231.014302883.0100000.038022783.052014-082014_352014
4262014-08-312540564422342009815032120116003141275090213272878511148125324124753187292943033106464912...161974171221170223745525.031675561.00.051043826.062014-082014_352014

427 rows × 22 columns

plt.figure(figsize=(20,15))
plt.subplot(2,1,1)
plt.plot(user_b.groupby(by='date').count()['report_date'].index,
        user_b.groupby(by='date').count()['report_date'].values)
plt.title('人数')
plt.subplot(2,1,2)
plt.plot(user_b.groupby(by='date').mean()['total_purchase_amt'].index,
        user_b.groupby(by='date').mean()['total_purchase_amt'].values)
plt.plot(user_b.groupby(by='date').mean()['total_redeem_amt'].index,
        user_b.groupby(by='date').mean()['total_redeem_amt'].values)
plt.title('平均交易量')
Text(0.5, 1.0, '平均交易量')

在这里插入图片描述

结合收益率

total_b['num'] = user_b.set_index('date').groupby(by='date').count()['report_date'].values

interest.set_index(pd.to_datetime(interest['mfd_date'],format='%Y%m%d').dt.date,inplace=True)

total_b = total_b.join(interest)

shibor.set_index(pd.to_datetime(shibor['mfd_date'],format='%Y%m%d').dt.date,inplace=True)
total_b =  total_b.join(shibor.drop(columns='mfd_date'))

total_b.shape
(427, 29)
tb_in = total_b[['total_purchase_amt','direct_purchase_amt','mfd_daily_yield','mfd_7daily_yield']]
tb_in['mfd_daily_yield_-1'] = tb_in['mfd_daily_yield'].shift(-1)
tb_in.corr()
total_purchase_amtdirect_purchase_amtmfd_daily_yieldmfd_7daily_yieldmfd_daily_yield_-1
total_purchase_amt1.0000000.9999860.4351400.4268140.422715
direct_purchase_amt0.9999861.0000000.4362000.4277330.423756
mfd_daily_yield0.4351400.4362001.0000000.9765060.972618
mfd_7daily_yield0.4268140.4277330.9765061.0000000.965990
mfd_daily_yield_-10.4227150.4237560.9726180.9659901.000000

  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值