# from numpy.random import seed
# seed(1)
# import tensorflow
# tensorflow.random.set_seed(2)
import numpy as np
import pandas as pd
data = pd.read_excel("",index=False) #指定文件路径
data1 = data.iloc[:-1,1:14]
data1
#描述性分析
r = [data1.min(),data1.max(),data1.mean(),data1.std()]
#r
r = pd.DataFrame(r,index=['MIN','MAX','MEAN','STD']).T
np.round(r,2)
#相关性分析
data2 = data.iloc[:-1,1:]
data2 = pd.DataFrame(data2)
np.round(data2.corr(method='pearson'),2)
x11相关性很低,可以考虑舍去
#标准化
import math
data3 = pd.DataFrame(data2,dtype=np.float)
dataNM = (data3-data3.min())/(data3.max()-data3.min())
#dataNM
#绘制折线图
from matplotlib import pyplot
import matplotlib.pyplot as plt
#x = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
dataNM.plot()
plt.legend(loc='lower right',bbox_to_anchor=(1.2, -0.3),ncol=7)
plt.figure(figsize=(10,10))
#Lasso变量选择
from sklearn.linear_model import Lasso
model=Lasso()
model.fit(data2.iloc[:,0:13],data2['y'])
q = model.coef_#各特征系数
q = pd.DataFrame(q,index=['x1','x2','x3','x4','x5','x6','x7','x8','x9','x10','x11','x12','x13']).T
np.round(q,4)
x12系数为0,所以舍去的变量应该是x12
# GM模型,预测
def GM11(x0): #自定义灰色预测函数
import numpy as np
x1 = x0.cumsum() #1-AGO序列
x1 = pd.DataFrame(x1)
z1 = (x1 + x1.shift())/2.0 #紧邻均值(MEAN)生成序列
z1 = z1[1:].values.reshape((len(z1)-1,1)) # 转成矩阵
B = np.append(-z1, np.ones_like(z1), axis = 1) # 列合并-z1和形状同z1的1值矩阵 19X2
Yn = x0[1:].reshape((len(x0)-1, 1)) # 转成矩阵 19
[[a],[b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Yn) #计算参数,基于矩阵运算,np.dot矩阵相乘,np.linalg.inv矩阵求逆
f = lambda k: (x0[0]-b/a)*np.exp(-a*(k-1))-(x0[0]-b/a)*np.exp(-a*(k-2)) #还原值
delta = np.abs(x0 - np.array([f(i) for i in range(1,len(x0)+1)])) # 残差绝对值序列
C = delta.std()/x0.std()
P = 1.0*(np.abs(delta - delta.mean()) < 0.6745*x0.std()).sum()/len(x0)
return f, a, b, x0[0], C, P #返回灰色预测函数、a、b、首项、方差比、小残差概率
#x0=data_1['x3'][:-2].values
l=['x1','x2','x3','x4','x5','x6','x7','x8','x9','x10','x11','x13','y']
data4=data2[l].copy()
data4.index=range(1996,2017)
data4.loc[2017]=None
data4.loc[2018]=None # 添加预测行
#data2.loc[2018]=None
for i in l: # 列计算,预测每列2016、2017预测值
f=GM11(data4[i][:-2].values)[0] # 利用返回的灰色预测函数,仅和对对应期数及位置有关
data4[i][2017]=f(len(data4)-1)
data4[i][2018]=f(len(data4))
#data2[i][2018]=f(len(data2))
data4[i]=data4[i].round(2)
print(data4)
x变量预测的还能接受,但y变量出入太大了,所以选择神经网络模型预测y值
#神经网络预测模型
feature=['x1','x2','x3','x4','x5','x6','x7','x8','x9','x10','x11','x13']#特征所在列
data_train = data.iloc[:-1,1:]
data_pre = data4.iloc[:,:14]
data_mean = data_train.mean()
data_std = data_train.std()
data_train = (data_train-data_train.mean())/data_std #数据标准化
x_train = data_train[feature].as_matrix() #特征数据
y_train = data_train['y'].as_matrix() #标签数据
from keras.models import Sequential
from keras.layers.core import Dense,Activation
model = Sequential() #建立模型
model.add(Dense(input_dim=12,output_dim=12))
model.add(Activation('relu')) #激活函数
model.add(Dense(input_dim=12,output_dim=1))
model.compile(loss='mean_squared_error',optimizer='adam') #编译模型,目标函数是均方差
model.fit(x_train,y_train,nb_epoch=10000,batch_size=16) #训练模型
#预测还原结果
x = ((data_pre[feature]-data_mean[feature])/data_std[feature]).as_matrix()
data_pre[u'y_pred'] = model.predict(x)*data_std['y']+data_mean['y']
训练花费的时间较长
#画出预测图
import matplotlib.pyplot as plt
p=data_pre[['y','y_pred']].plot(subplots=True,style=['b-o','r-*'])
plt.show()
data_pre[['y','y_pred']].plot()
data_pre
#导出数据
#xlsx = r"C:\Users\18703\Desktop\天津市财政收入预测模型" + '预测结果' + 'xlsx'
data_pre.to_excel('预测.xlsx')