本文总结了7种统计回归方法,11种机器学习方法,4种集成学习方法以及4种深度学习方法共26种方法的python代码。(其实就是调包,本文将这些总结了一下)。
一、统计回归
1.线性回归(LR)模型
from sklearn.linear_model import LinearRegression
LR=LinearRegression()
LR.fit(x_train,y_train)
LR_pre=LR.predict(x_test)
print("MAE,RMSE",mean_absolute_error(y_test,LR_pre),np.sqrt(mean_squared_error(y_test,LR_pre)))
2.岭回归(Ridge)模型
from sklearn.linear_model import Ridge
将LR模型中的LinearRegression替换为Ridge即可
3.Lasso回归模型
from sklearn.linear_model import Lasso
将LR模型中的LinearRegression替换为Lasso即可
4.逻辑回归模型
from sklearn.linear_model import LogisticRegression
将LR模型中的LinearRegression替换为LogisticRegression即可
5.ARIMA回归模型
from statsmodels.tsa.arima.model import ARIMA
arimax_model = ARIMA(y_train, order=(i,j,k)).fit() #i,j,k是参数(可网格搜索)
pred_result = arimax_model.forecast(y_test.shape[0])
print("预测结果 mae:", mean_absolute_error(y_test, pred_result), " rMSE:", np.sqrt(mean_squared_error(y_test, pred_result)))
6.SARIMAX回归模型
import statsmodels.api as sm
arimax_model = sm.tsa.statespace.SARIMAX(endog=y_train, exog=x_train, order=(i,j,k),seasonal_order=(1,1,1,48)).fit(disp=0)
#这个有点耗时,i,j,k可先根据ARIMA的参数设定,然后再去调;这里的48指的是时间序列的周期性,笔者的数据是48一个周期。
pred_result = arimax_model.forecast(x_test.shape[0], exog=x_test)
print("预测结果 mae:", mean_absolute_error(y_test, pred_result), " rMSE:", np.sqrt(mean_squared_error(y_test, pred_result)))
7.向量自回归模型
from statsmodels.tsa.vector_ar.var_model import VAR
var_model = VAR(x_train[:, 3:])
var_model_fit = var_model.fit()
pred_result = var_model_fit.forecast(x_test[:, 3:],steps=y_test.shape[0])
pred_result =pred_result[:, -1]
print("预测结果 mae:", mean_absolute_error(y_test, pred_result), " rMSE:", np.sqrt(mean_squared_error(y_test, pred_result)))
二、机器学习回归
1.决策树回归模型(DT)
from sklearn.tree import DecisionTreeRegressor
Model=DecisionTreeRegressor()
Model.fit(x_train,y_train)
Model_pre=Model.predict(x_test)
print("MAE,RMSE",mean_absolute_error(y_test,Model_pre),np.sqrt(mean_squared_error(y_test,Model_pre)))
2.支持向量机回归(SVM)
from sklearn.svm import SVR
将DT模型中的DecisionTreeRegressor替换为SVR即可
3.随机森林回归
from sklearn.ensemble import RandomForestRegressor
将DT模型中的DecisionTreeRegressor替换为RandomForestRegressor即可
4.极端随机森林回归
from sklearn.tree import ExtraTreeRegressor
将DT模型中的DecisionTreeRegressor替换为ExtraTreeRegressor即可
5.GBDT回归
from sklearn.ensemble import GradientBoostingRegressor
将DT模型中的DecisionTreeRegressor替换为GradientBoostingRegressor即可
6.XGBoost回归
from xgboost import XGBRegressor
将DT模型中的DecisionTreeRegressor替换为XGBRegressor即可,记得先pip install xgboost
7.LightGBM回归
from lightgbm import LGBMRegressor
将DT模型中的DecisionTreeRegressor替换为LGBMRegressor即可,记得先pip install lightgbm
8.Catboost回归
from catboost import CatBoostRegressor
将DT模型中的DecisionTreeRegressor替换为CatBoostRegressor即可,记得先pip install catboost
9.深度森林回归
from deepforest import CascadeForestRegressor
将DT模型中的DecisionTreeRegressor替换为CascadeForestRegressor即可,记得先pip install deepforest
10.K近邻回归
from sklearn.neighbors import KNeighborsRegressor
将DT模型中的DecisionTreeRegressor替换为KNeighborsRegressor即可
11.多层感知机回归(最简单的ANN)
from sklearn.neural_network import MLPRegressor
将DT模型中的DecisionTreeRegressor替换为MLPRegressor即可
三、集成学习回归
1.基于投票的集成
from sklearn.tree import DecisionTreeRegressor
Model1=DecisionTreeRegressor()
Model1.fit(x_train,y_train)
Model_pre1=Model1.predict(x_test)
Model2=DecisionTreeRegressor()
Model2.fit(x_train,y_train)
Model_pre2=Model2.predict(x_test)
Model_pre=[]
for i in range(len(Model_pre1)):
Model_pre.append(0.5 * Model_pre1[i] + 0.5 * Model_pre2[i])
print("MAE,RMSE",mean_absolute_error(y_test,Model_pre),np.sqrt(mean_squared_error(y_test,Model_pre)))
2.基于Bagging的集成
model1,model2=CatBoostRegressor(),CatBoostRegressor()
model = [model1,model2]
FINAL=[]
for i in model:
normal_train = np.array(TRAIN.values.tolist())
random.shuffle(normal_train)
normal_x = normal_train[:, 3:-1]
normal_y = normal_train[:, -1]
i.fit(normal_x,normal_y)
result = i.predict(time_x_test)
FINAL.append(result)
finalpred=FINAL[0]*0.5+FINAL[1]*0.5
print("BAI的MAE,RMSE",
round(mean_absolute_error(time_y_test,finalpred),2),
round(np.sqrt(mean_squared_error(time_y_test,finalpred)),2))
3.基于Boosting的集成
from sklearn.ensemble import AdaBoostRegressor
m =CatBoostRegressor().fit(normal_x,normal_y)
ada = AdaBoostRegressor(base_estimator=m,n_estimators=10,learning_rate=0.1,)
ada_fit =ada.fit(normal_x,normal_y)
y_pred = ada_fit.predict(time_x_test)
print("BAI的MAE,RMSE",
round(mean_absolute_error(time_y_test,y_pred),2),
round(np.sqrt(mean_squared_error(time_y_test,y_pred)),2))
4.基于Stacking的集成
val_time=[]
Stark_X_time = []
for train_index, test_index in kf.split(time_x_train):
X_train, X_test,y_train, y_test = time_x_train[train_index], time_x_train[test_index],time_y_train[train_index], time_y_train[test_index]
time_model = CatBoostRegressor()
time_model.fit(X_train,y_train)
p =time_model.predict(X_test)
val_time.extend(p)
Stark_X_time.append(time_model.predict(time_x_test))
val_normal=[]
val_normalZONG=[]
Stark_X_normal = []
for train_index, test_index in kf.split(normal_x):
X_train, X_test,y_train, y_test = normal_x[train_index], normal_x[test_index],normal_y[train_index], normal_y[test_index]
normal_model = CatBoostRegressor()
normal_model.fit(X_train,y_train)
q =normal_model.predict(X_test)
Stark_X_normal.append(normal_model.predict(time_x_test))
val_normalZONG.extend(q)
try:
for i in range(len(val_normalZONG)):
val_normal.append(val_normalZONG[map*map*i+a*map+b])
except:
pass
test_x_time = np.mean(Stark_X_time,axis=0)
test_x_normal = np.mean(Stark_X_normal,axis=0)
meta_model =CatBoostRegressor()
meta_model.fit([[i,j] for i,j in zip(val_time,val_normal)],time_y_train)
ff=meta_model.predict([[i,j] for i,j in zip(test_x_time,test_x_normal)])
print('finallll',mean_absolute_error(time_y_test,ff),np.sqrt(mean_squared_error(time_y_test,ff)))
四、深度学习回归
1.卷积神经网络CNN
from keras.models import Sequential
from keras.layers import Dense,Conv1D,MaxPooling1D,Flatten
train_x = time_x_train
train_y = time_y_train
test_x = time_x_test
test_y = time_y_test
# 归一化处理
max_value = np.max(train_x)
min_value = np.min(train_x)
train_x = (train_x - min_value) / (max_value - min_value)
test_x = (test_x - min_value) / (max_value - min_value)
print(train_x.shape,test_x.shape)
model = Sequential()
model.add(Conv1D(32,3, input_shape=(23,1)))
model.add(MaxPooling1D())
model.add(Conv1D(64,3))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(1))
model.compile(loss='mse', optimizer='adam')
model.fit(train_x, train_y, epochs=50, batch_size=1)
predict_y = model.predict(test_x)
predict_y = predict_y.reshape(-1)
from sklearn.metrics import mean_absolute_error,mean_squared_error
print(predict_y)
mae = mean_absolute_error(time_y_test, predict_y) # 使用R2指标评价预测结果
rmse = np.sqrt(mean_squared_error(time_y_test, predict_y)) # 使用MSE评价训练结果
print("预测结果 mae:", mae, " RMSE:", rmse)
2.循环神经网络RNN
from keras.models import Sequential
from keras.layers import Dense,SimpleRNN
train_x = time_x_train
train_y = time_y_train
test_x = time_x_test
test_y = time_y_test
# 归一化处理
max_value = np.max(train_x)
min_value = np.min(train_x)
train_x = (train_x - min_value) / (max_value - min_value)
test_x = (test_x - min_value) / (max_value - min_value)
# # 转换数据格式
train_x = train_x.reshape(-1, 1, 23)
test_x = test_x.reshape(-1, 1, 23)
print(train_x.shape,test_x.shape)
# 创建LSTM模型
model = Sequential()
model.add(SimpleRNN(50,input_shape=(1, 23)))
model.add(Dense(1))
model.compile(loss='mse', optimizer='adam')
model.fit(train_x, train_y, epochs=50, batch_size=1)
predict_y = model.predict(test_x)
predict_y = predict_y.reshape(-1)
from sklearn.metrics import mean_absolute_error,mean_squared_error
print(predict_y)
mae = mean_absolute_error(time_y_test, predict_y) # 使用R2指标评价预测结果
rmse = np.sqrt(mean_squared_error(time_y_test, predict_y)) # 使用MSE评价训练结果
print("预测结果 mae:", mae, " RMSE:", rmse)
3.长短期记忆神经网络LSTM
from keras.models import Sequential
from keras.layers import LSTM, Dense
将RNN中SimpleRNN改为LSTM即可
4.门控循环单元GRU
from keras.models import Sequential
from keras.layers import Dense,GRU
将RNN中SimpleRNN改为GRU即可