import numpy as np
import pandas as pd
from sklearn import linear_model
import warnings
warnings.filterwarnings("ignore") # 过滤futureWarning
import seaborn as sn
import matplotlib.pyplot as plt
def grey_models11(x0): #自定义灰色预测函数
x1 = x0.cumsum() #1-AGO序列
x1 = pd.DataFrame(x1)
z1 = (x1 + x1.shift())/2.0 #紧邻均值(MEAN)生成序列
z1 = z1[1:].values.reshape((len(z1)-1, 1)) # 转成矩阵
B = np.append(-z1, np.ones_like(z1), axis=1) # 列合并-z1和形状同z1的1值矩阵 19X2
Yn = x0[1:].reshape((len(x0)-1, 1)) # 转成矩阵 19
[[a], [b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Yn) # 计算参数,基于矩阵运算,np.dot矩阵相乘,np.linalg.inv矩阵求逆
f = lambda k: (x0[0]-b/a)*np.exp(-a*(k-1))-(x0[0]-b/a)*np.exp(-a*(k-2)) # 还原值
delta = np.abs(x0 - np.array([f(i) for i in range(1, len(x0)+1)])) # 残差绝对值序列
C = delta.std()/x0.std()
P = 1.0 * (np.abs(delta - delta.mean()) < 0.6745*x0.std()).sum()/len(x0)
return f, a, b, x0[0], C, P # 返回灰色预测函数、a、b、首项、方差比、小残差概率
file = 'data.csv'
data = pd.read_csv(file)
# 1、计算相关系数矩阵,保留2位小数
c = np.round(data.corr(method='pearson'), 2)
# 热图
sn.heatmap(c, vmin=None, vmax=None, cmap='Wistia')
plt.show()
print("相关系数矩阵:\n", c)
reg = linear_model.LassoLars(alpha=1000, normalize=False)
reg.fit(data.iloc[:, 0:13], data['y'])
# 2、分析系数表
print("分析系数表:\n", reg.coef_)
# 3、预测特征
l = ['x1', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x13', 'y'] # 所选特征
data_1 = data[l].copy() # 存放所选特征的值及其预测值
data_1.index = range(1994, 2014)
data_1.loc[2014] = None # 添加预测行
data_1.loc[2015] = None
output_file = 'data1_GM.xls' # 灰色预测后保存的路径
print("后验差检验模型精度:")
for i in l: # 列计算,预测每列2014、2015预测值
res = grey_models11(data_1[i][:-2].values) # 利用返回的灰色预测函数,仅和对对应期数及位置有关
f = res[0]
# 后验差检验模型精度
P = res[-1] # 方差比
C = res[-2] # 残差概率
print(i, P, C)
data_1[i][2014] = f(len(data_1)-1)
data_1[i][2015] = f(len(data_1))
data_1[i] = data_1[i].round(2)
data_1.to_excel(output_file, "a+")
from sklearn.svm import LinearSVR
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn.linear_model import LinearRegression # 线性回归
from sklearn.neighbors import KNeighborsRegressor # K近邻回归
from sklearn.neural_network import MLPRegressor # 神经网络回归
from sklearn.tree import DecisionTreeRegressor # 决策树回归
from sklearn.tree import ExtraTreeRegressor # 极端随机森林回归
from xgboost import XGBRegressor # XGBoot
from sklearn.ensemble import RandomForestRegressor # 随机森林回归
from sklearn.ensemble import AdaBoostRegressor # Adaboost 集成学习
from sklearn.ensemble import GradientBoostingRegressor # 集成学习梯度提升决策树
from sklearn.ensemble import BaggingRegressor # bagging回归
from sklearn.linear_model import ElasticNet
import pandas as pd
from sklearn.metrics import explained_variance_score, \
mean_absolute_error, mean_squared_error, \
median_absolute_error, r2_score
import warnings
warnings.filterwarnings("ignore") # 过滤futureWarning
models = [LinearRegression(), KNeighborsRegressor(), MLPRegressor(alpha=20), DecisionTreeRegressor(),
ExtraTreeRegressor(), XGBRegressor(), RandomForestRegressor(), AdaBoostRegressor(),
GradientBoostingRegressor(), BaggingRegressor(), ElasticNet()]
models_str = ['LinearRegression', 'KNNRegressor', 'MLPRegressor', 'DecisionTree', 'ExtraTree', 'XGBoost',
'RandomForest', 'AdaBoost', 'GradientBoost', 'Bagging', 'ElasticNet']
# 建立支持向量回归模型
inputfile = 'data1_GM.xls' # 灰色预测后保存的路径
data = pd.read_excel(inputfile, names=['year', 'x1', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x13', 'y']) # 读取数据
feature = ['x1', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x13'] # 属性所在列
data_train = data.iloc[:20].copy() # 取2014年前的数据建模
data_mean = data_train.mean()
data_std = data_train.std()
data_train = (data_train - data_mean)/data_std # 数据标准化
x_train = data_train[feature].values # 属性数据
y_train = data_train['y'].values # 标签数据
linearsvr = LinearSVR(epsilon=0, tol=0.0001, C=1.0, loss='epsilon_insensitive', max_iter=1000) # 调用LinearSVR()函数
linearsvr.fit(x_train, y_train)
x = ((data[feature] - data_mean[feature])/data_std[feature]).values # 预测,并还原结果。
data['y_pred'] = linearsvr.predict(x) * data_std['y'] + data_mean['y']
outputfile = 'result.xls' # SVR预测后保存的结果
data.to_excel(outputfile)
print('真实值与预测值分别为:\n', data[['y', 'y_pred']])
data.plot(x='year', y=['y', 'y_pred'], style=['b-o', 'r-*'])
plt.gca().xaxis.set_major_locator(ticker.MultipleLocator(2))
plt.show()
p = data[['y', 'y_pred']].plot(style=['b-o', 'r-*'])
plt.show()
#训练模型
for name, model in zip(models_str, models):
print('开始训练模型:' + name)
model = model # 建立模型
a = 'y_pred_' + name
data[a] = model.fit(x_train, y_train).predict(x) * data_std['y'] + data_mean['y']
df = data[:-2]
print('平均绝对误差为:', mean_absolute_error(df['y'].values, df[a].values))
print('均方误差为:', mean_squared_error(df['y'], df[a]))
print('中值绝对误差为:', median_absolute_error(df['y'], df[a]))
print('可解释方差值为:', explained_variance_score(df['y'], df[a]))
print('R方值为:', r2_score(df['y'], df[a]))
print('*-*' * 15)