最近又接到一个预测项目需求,主要是预测每天投资用户会投资不同产品多少金额,属于每天即时预测,需要拿最近一年的数据做测试集,来预测每天不同期限产品分别被投资多少金额,然后通过这些金额每天找借款端匹配借款需求,借款端运营通过资金端需求来动态调整营销活动,通过资金需求多少来有效运营借款需求,形成与资金端的良性互动,节约闲置资金成本。
就这样一个需求,如何实现?我首先想到通过业务经验的方法来实现,需要从N多产品中抽丝剥茧,找到产品、推广,促销,节日之间的关系,最终确定一个期限的产品当天成交预测值;但是从成交数据来看,这些数据都是每天在变化的,不能明显看出两者之间有明显的相关关系,用这些现有的数据关系能预测将来的成交吗?
既然是预测问题,那我首先想到的是时序预测,那么问题就来了,如何程序化这些现有的数据关系?
在网上找了一些相关模型,并没有模型来实现我所说的推广,促销,节日,加息,降息等业务时点与预测数据的关系,仔细查找,首先(注意还有后续,我会在下篇博文做出解释)发现了一个名叫prophet的时序预测包,里面可以用不同类型的holiday值来对模型进行业务时点的匹配和修正,非常适合目前的项目需求。不管三七二十一,先安装了再说。
安装了之后需要初始化数据,初始化节假日,初始化模型,调整参数,衡量结果,具体代码如下:
library(prophet)
library(dplyr)
library(dplyr)
#初始化数据
all<-read.csv('d:/Rdata/zjd/ts/all.csv',na.string='NA',header=T)
alln<-read.csv('d:/Rdata/zjd/ts/alln.csv',na.string='NA',header=T)
qb30<-read.csv('d:/Rdata/zjd/ts/qb30.csv',na.string='NA',header=T)
qb45<-read.csv('d:/Rdata/zjd/ts/qb45.csv',na.string='NA',header=T)
qb60<-read.csv('d:/Rdata/zjd/ts/qb60.csv',na.string='NA',header=T)
qb90<-read.csv('d:/Rdata/zjd/ts/qb90.csv',na.string='NA',header=T)
qb180<-read.csv('d:/Rdata/zjd/ts/qb180.csv',na.string='NA',header=T)
qb270<-read.csv('d:/Rdata/zjd/ts/qb270.csv',na.string='NA',header=T)
qb365<-read.csv('d:/Rdata/zjd/ts/qb365.csv',na.string='NA',header=T)
qb730<-read.csv('d:/Rdata/zjd/ts/qb730.csv',na.string='NA',header=T)
qb1095<-read.csv('d:/Rdata/zjd/ts/qb1095.csv',na.string='NA',header=T)
qb1095m<-read.csv('d:/Rdata/zjd/ts/qb1095+.csv',na.string='NA',header=T)
qbother<-read.csv('d:/Rdata/zjd/ts/qbother.csv',na.string='NA',header=T)
#holiday<-read.csv('d:/Rdata/zjd/ts/holidays.csv',na.string='NA',header=T)
historyalln <- data.frame(ds = seq(as.Date('2017-01-01'), as.Date('2017-09-17'), by = 'd'), y = alln$yn)
historyalln <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = all$yn)
historyallo <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = all$yo)
history30n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb30$yn)
plot(history30n$ds,history30n$y)
history45n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb45$yn)
history60n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb60$yn)
history90n <- data.frame(ds
alln<-read.csv('d:/Rdata/zjd/ts/alln.csv',na.string='NA',header=T)
qb30<-read.csv('d:/Rdata/zjd/ts/qb30.csv',na.string='NA',header=T)
qb45<-read.csv('d:/Rdata/zjd/ts/qb45.csv',na.string='NA',header=T)
qb60<-read.csv('d:/Rdata/zjd/ts/qb60.csv',na.string='NA',header=T)
qb90<-read.csv('d:/Rdata/zjd/ts/qb90.csv',na.string='NA',header=T)
qb180<-read.csv('d:/Rdata/zjd/ts/qb180.csv',na.string='NA',header=T)
qb270<-read.csv('d:/Rdata/zjd/ts/qb270.csv',na.string='NA',header=T)
qb365<-read.csv('d:/Rdata/zjd/ts/qb365.csv',na.string='NA',header=T)
qb730<-read.csv('d:/Rdata/zjd/ts/qb730.csv',na.string='NA',header=T)
qb1095<-read.csv('d:/Rdata/zjd/ts/qb1095.csv',na.string='NA',header=T)
qb1095m<-read.csv('d:/Rdata/zjd/ts/qb1095+.csv',na.string='NA',header=T)
qbother<-read.csv('d:/Rdata/zjd/ts/qbother.csv',na.string='NA',header=T)
#holiday<-read.csv('d:/Rdata/zjd/ts/holidays.csv',na.string='NA',header=T)
historyalln <- data.frame(ds = seq(as.Date('2017-01-01'), as.Date('2017-09-17'), by = 'd'), y = alln$yn)
historyalln <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = all$yn)
historyallo <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = all$yo)
history30n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb30$yn)
plot(history30n$ds,history30n$y)
history45n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb45$yn)
history60n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb60$yn)
history90n <- data.frame(ds