回归的stacking方法代码以及多输出回归的stacking代码方法

兔兔爱学习兔兔爱学习

于 2024-06-03 10:43:32 发布

阅读量815

点赞数 25

文章标签：回归数据挖掘人工智能

本文链接：https://blog.csdn.net/weixin_44245188/article/details/139408624

版权

import pandas as pd
import os
import datetime
path = os.path.dirname(os.path.realpath(file))
import matplotlib
import pylab as plt
plt.rcParams[‘font.sans-serif’] = [‘SimHei’] #显示中文
from matplotlib.font_manager import FontProperties
plt.rcParams[‘axes.unicode_minus’]=False
import numpy as np
import matplotlib
import pylab as plt
plt.rcParams[‘font.sans-serif’] = [‘SimHei’] #显示中文
from matplotlib.font_manager import FontProperties
plt.rcParams[‘axes.unicode_minus’]=False
import pandas as pd
#setup_seed(123)
import os
import joblib
import lightgbm as lgb
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor
from sklearn.linear_model import Ridge
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import r2_score
import numpy as np
import joblib
import time
import optuna
from hyperopt import fmin, hp, partial, Trials, tpe,rand
from lightgbm import log_evaluation, early_stopping
path = os.path.dirname(os.path.realpath(file))
import warnings

设置警告过滤器以忽略特定警告

warnings.filterwarnings(“ignore”, message=“No further splits with positive gain”)

def make_shorttime_data(data):
print(“*******************”+“短期预测数据准备”+“**************************”)
###########短期预测-1.数据准备##########
train=data[‘2024-01-21 00:00:00’: ]
test=data[‘2024-05-01 00:15’:]
result=pd.DataFrame(columns=[‘实际功率’, ‘真实值’, ‘预测值’])
result[‘实际功率’]=test[‘实际功率’][‘2024-05-01 00:15’:]#[96+96-1:]
print(train.shape,test.shape)

x_train, y_train,x_test,y_test=train.iloc[:,1].values.reshape([-1,1]),train.iloc[:,0].values.reshape([-1,1]),test.iloc[:,1].values.reshape([-1,1]),test.iloc[:,0].values.reshape([-1,1])
y_train,y_test=y_train.ravel(),y_test.ravel()
print('x_train.shape',x_train.shape,'y_train.shape',y_train.shape)
print('x_test.shape',x_test.shape,'y_test.shape',y_test.shape)
return x_train, y_train,x_test,y_test,result

###########短期预测-2.模型定义##########
def short_stacking_model(x_train, y_train,x_test,y_test):
from mlxtend.regressor import StackingCVRegressor
from mlxtend.data import boston_housing_data
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import mean_squared_error
import numpy as np
import matplotlib.pyplot as plt

# 初始化基模型
lr = LinearRegression()
svr_lin = SVR(kernel='linear', gamma='auto')
lasso =Lasso()
ridge = Ridge(random_state=2019,)
catboost=CatBoostRegressor(train_dir=path+r'/catboosttrain/',  
                    iterations=200, learning_rate=0.03,
                    depth=6, l2_leaf_reg=3,
                    loss_function='MAE',
                    eval_metric='MAE',
                    random_seed=2013,
                    verbose=0)#参数二model=MultiOutputRegressor(model)

lightgbm = LGBMRegressor(objective='regression',verbose=0)
   
models = [lr, svr_lin, lasso, ridge,catboost,lightgbm]
params = {'lasso__alpha': [0.1, 1.0, 10.0],
        'ridge__alpha': [0.1, 1.0, 10.0]}
sclf = StackingCVRegressor(regressors=models, meta_regressor=ridge)
#grid = GridSearchCV(estimator=sclf, param_grid=params, cv=5, refit=True)
#grid.fit(x_train, y_train)
#print(grid.best_score_, grid.best_params_)
sclf.fit(x_train, y_train) 
import joblib 
#save model
#model=grid(**grid.best_params_)
model=sclf 
print('5-fold cross validation scores:\n')
for clf, label in zip([lr, svr_lin, lasso, ridge,catboost,lightgbm,sclf], ['lr', 'svr_lin', 'lasso', 'ridge','catboost','lightgbm','sclf']):
    scores = cross_val_score(clf, x_train, y_train, cv=5, scoring='neg_mean_squared_error')
    print("Neg. MSE Score: %0.2f (+/- %0.2f) [%s]"%(scores.mean(), scores.std(), label))
return model

###########短期预测-3.模型训练与预测##########
def shorttime_model_make_result(x_train, y_train,x_test,y_test,model,modelname,Cap,result):
model.fit(x_train, y_train)
#modelname=str(model)#type(model).name
print(“=“+modelname+”=”)
#save model
joblib.dump(model, path+r’/model/‘+modelname+r’shorttime.pkl’)
#load model
model= joblib.load(path+r’/model/'+modelname+r’shorttime.pkl’)
x_test=x_test.reshape(-1,1)
y_pred = model.predict(x_test)
#print(y_test.shape,y_pred.shape)
y_test,y_pred=y_test.ravel(),y_pred.ravel()
#print(y_test.shape,y_pred.shape)

# 校正
for j in range(len(y_pred)):
    y_pred[j] = np.round(y_pred[j], 3)
    if y_pred[j] < 0:
        y_pred[j] = float(0)
    if y_pred[j]>Cap:
        y_pred[j]=Cap

#print(y_test.shape,y_pred.shape)
mse=mean_squared_error(y_test,y_pred)
rmse=np.sqrt(mean_squared_error(y_test,y_pred))
mae=mean_absolute_error(y_test,y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)
r2score=r2_score(y_test, y_pred)
print('mse:',mse)
print('rmse:',rmse)
print('mae',mae)
print('mape',mape)
print('r2score',r2score)

#分辨率参数-dpi，画布大小参数-figsize
#plt.figure(dpi=300,figsize=(24,8))
plt.title(modelname+str("预测结果"))
plt.plot(y_test,label="真实数据")
plt.plot(y_pred,label="预测值")
plt.legend(loc=1)
plt.savefig(path+r"/pictures/"+modelname+"短期.png")
plt.close()
result['真实值']=y_test
result['预测值']=y_pred
result.to_csv(path+r"/result/"+modelname+"短期.csv", sep=',')

def make_ultrashorttime_data(data):
print(““+“超短期预测数据准备”+”********”)
###########超短期预测-1.数据准备##########
data= data[[“实际功率”,“预测风速”]]
x_=data.iloc[:,1].values.reshape([-1,1])
modelshorttime=joblib.load(path+r’/model/CatBoostRegressor_shorttime.pkl’)
data[“短期预测值”]=modelshorttime.predict(x_ )

data= data[["实际功率","预测风速","短期预测值",]]
#删除某行中某个值为0的行
data= data[data['实际功率'] != np.nan]
data=data.fillna(value='0')
train_=data['2024-01-21 00:00': ]
test_=data['2024-04-30 16:30':]
result_=pd.DataFrame(columns=['实际功率', '真实值'])
result_['实际功率']=test_['实际功率']['2024-05-01 00:15':]
result16=pd.DataFrame(columns=['实际功率', '真实值'])
result16['实际功率']=test_['实际功率']['2024-05-01 00:15':]
print(train_.shape,test_.shape)

num_nodes=3
def process(dataset):
    #观看过去时间窗口 过去多少天
    past_history_size = 16
    #预测未来值n天
    future_target = 16
    x = []
    y = []
    dataset=dataset.values
    for i in range(len(dataset)-past_history_size-future_target+1):
        x_1=dataset[i:i+past_history_size,0]
        x_2=dataset[i:i+past_history_size,1]
        x_3=dataset[i:i+past_history_size,2]
        x_4=dataset[i+past_history_size:i+past_history_size+future_target,-1]
        x_5=dataset[i+past_history_size:i+past_history_size+future_target,1]
        xxxxx=np.concatenate((x_1,x_2,x_3,x_4,x_5),axis=0) 
        x.append(xxxxx)
        y.append(dataset[i+past_history_size:i+past_history_size+future_target,0])
    x=np.array(x)
    y = np.array(y)   
    return x ,y 

train_x, train_y= process(train_)
test_x,test_y = process(test_)
print(train_x.shape, train_y.shape,test_x.shape,test_y.shape)
return train_x, train_y,test_x,test_y,result_,result16

###########超短期预测-2.模型定义##########

from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split, KFold
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error
from mlxtend.regressor import StackingCVRegressor
import numpy as np

自定义多输出 StackingCVRegressor 变体

class MultiOutputStackingCVRegressor:
def init(self, base_estimators, meta_regressor, cv=5, random_state=42):
self.base_estimators = base_estimators
self.meta_regressor = meta_regressor
self.cv = cv
self.random_state = random_state

def fit(self, X, Y):
    kf = KFold(n_splits=self.cv, shuffle=True, random_state=self.random_state)
    n_targets = Y.shape[1]
    n_base_learners = len(self.base_estimators)
    meta_features = np.zeros((X.shape[0],  n_base_learners))

    for fold_idx, (train_index, test_index) in enumerate(kf.split(X)):
        for target in range(n_targets):
            for i, (_, estimator) in enumerate(self.base_estimators):
                estimator.fit(X[train_index], Y[train_index, target])
                meta_features[test_index,  i] = estimator.predict(X[test_index])
    # 确保Y的维度与构造的元特征维度相匹配，无需flatten
    self.meta_regressor.fit(np.hstack((X, meta_features)), Y)

    
def predict(self, X):
    # 对每个基础学习器预测，组合成元特征
    meta_features = np.column_stack([est.predict(X) for _, est in self.base_estimators])
    # 使用元学习器进行最终预测
    return self.meta_regressor.predict(np.column_stack((X, meta_features)))

def ultrashort_stacking_model(train_x, train_y,test_x,test_y):
# 假设其他必要的导入和变量如 CatBoostRegressor, LGBMRegressor 等已正确定义

base_learners = [
    ('lr', LinearRegression()),
    ('ridge', Ridge()),
    # 注意：下面的 CatBoostRegressor 和 LGBMRegressor 应当正确导入并初始化
    ('catboost',CatBoostRegressor(train_dir=path+r'/catboosttrain/', verbose=0)), 
    ('lightgbm',LGBMRegressor(objective='regression', verbose=-1))
]

meta_learner = LinearRegression()
# 实例化自定义的多输出 Stacking 模型
stacking_regressor = MultiOutputStackingCVRegressor(base_learners, meta_learner, cv=5)

# 训练模型
#stacking_regressor.fit(train_x, train_y)
# 预测并评估
#print("X_test.shape",X_test.shape)#(20, 10)
#Y_pred = stacking_regressor.predict(test_x)
#mse = mean_squared_error(test_y, Y_pred)
#print(f"Mean Squared Error: {mse}")

return stacking_regressor

###########超短期预测-3.模型训练与预测-输出最后1个点情况##########
def ultrashorttime_model_make_result(train_x, train_y,test_x,test_y,model,modelname,Cap,result_):

model.fit(train_x, train_y)
print("================="+modelname+"=================")
#save model
joblib.dump(model, path+r'/model/'+modelname+r'_ultrashorttime.pkl')
#load model
model_= joblib.load(path+r'/model/'+modelname+r'_ultrashorttime.pkl')

pred_y = model_.predict(test_x)
test_y,pred_y=test_y[:,-1],pred_y[:,-1]
print(test_y.shape,pred_y.shape)
    # 校正
for j in range(len(pred_y)):
    pred_y[j] = np.round(pred_y[j], 3)
    if pred_y[j] < 0:
        pred_y[j] = float(0)
    if pred_y[j]>Cap:
        pred_y[j]=Cap
        
mse=mean_squared_error(test_y,pred_y)
rmse=np.sqrt(mean_squared_error(test_y,pred_y))
mae=mean_absolute_error(test_y,pred_y)
mape = mean_absolute_percentage_error(test_y, pred_y)
r2score=r2_score(test_y, pred_y)
print('mse:',mse)
print('rmse:',rmse)
print('mae',mae)
print('mape',mape)
print('r2score',r2score)

#分辨率参数-dpi，画布大小参数-figsize
#plt.figure(dpi=300,figsize=(24,8))
plt.title(modelname+str("预测结果"))
plt.plot(test_y.ravel(),label="真实数据")
plt.plot(pred_y.ravel(),label="预测值")
plt.legend(loc=1)
plt.savefig(path+r"/pictures/"+modelname+"超短期.png")
plt.close()
result_['真实值']=test_y
result_['预测值']=pred_y
result_.to_csv(path+r"/result/"+modelname+"超短期.csv", sep=',')

modelname=[“Catboost”,“Lightgbm”,“Ridge”]
‘’’
ultrashorttime_model_make_result(train_x, train_y,test_x,test_y,catboost_model,modelname[0],Cap)
ultrashorttime_model_make_result(train_x, train_y,test_x,test_y,lightgbm_model,modelname[1],Cap)
ultrashorttime_model_make_result(train_x, train_y,test_x,test_y,ridge_model,modelname[2],Cap)
‘’’

###########超短期预测-4.模型训练与预测-输出16个点情况##########
#########按照要求模型预测16个点，但是为了便于部署模型预测出相应的时间点，在实际中，通常多预测出来一个点，变为预测17个点
#print(““+“超短期16个点预测开始”+”********”)
def ultrashorttime_model_make_result_16output(train_x, train_y,test_x,test_y,model,modelname,Cap,result16):
model.fit(train_x, train_y)
print(“=“+modelname+”=”)
#save model
joblib.dump(model, path+r’/model/‘+modelname+r’ultrashorttime.pkl’)
#load model
model= joblib.load(path+r’/model/'+modelname+r’ultrashorttime.pkl’)
pred_y = model.predict(test_x)
test_y_,pred_y_=test_y[:,-1],pred_y[:,-1]
print(test_y_.shape,pred_y_.shape)
mse=mean_squared_error(test_y_,pred_y_)
rmse=np.sqrt(mean_squared_error(test_y_,pred_y_))
mae=mean_absolute_error(test_y_,pred_y_)
mape = mean_absolute_percentage_error(test_y_, pred_y_)
r2score=r2_score(test_y_, pred_y_)
print(‘mse:’,mse)
print(‘rmse:’,rmse)
print(‘mae’,mae)
print(‘mape’,mape)
print(‘r2score’,r2score)

#分辨率参数-dpi，画布大小参数-figsize
#plt.figure(dpi=300,figsize=(24,8))
plt.title(modelname+str("预测结果"))
plt.plot(test_y_.ravel(),label="真实数据")
plt.plot(pred_y_.ravel(),label="预测值")
plt.legend(loc=1)
plt.savefig(path+r"/pictures/"+modelname+"超短期16个点取最后一个点画图.png")
plt.close()

def correction(jj):
    for j in range(len(pred_y[:,jj])):
        pred_y[:,jj][j] = np.round(pred_y[:,jj][j], 3)
        if pred_y[:,jj][j] < 0:
            pred_y[:,jj][j] = float(0)
        if pred_y[:,jj][j]>Cap:
            pred_y[:,jj][j]=Cap

for j in range(16):
    correction(j)
   
result16['真实值']=test_y_
for i in range(16):
    result16['预测值'+str(i)]=pred_y[:,i]

result16.to_csv(path+r"/result/"+modelname+"超短期16个点.csv", sep=',')

modelname=[“Catboost”,“Lightgbm”,“Ridge”]
‘’’
ultrashorttime_model_make_result_16output(train_x, train_y,test_x,test_y,catboost_model,modelname[0],Cap)
ultrashorttime_model_make_result_16output(train_x, train_y,test_x,test_y,lightgbm_model,modelname[1],Cap)
ultrashorttime_model_make_result_16output(train_x, train_y,test_x,test_y,ridge_model,modelname[2],Cap)
‘’’

if name == “main”:
###########场站数据##########
Cap=17
data=pd.read_csv(path+r"/data/with_nwp2024-05-14.csv", parse_dates=True, index_col=‘时间’)
data = data.sort_index()
print(“场站数据:”,data.shape)
print(data.head())
print(“=场站数据相关性=”)
print(data.corr())
data.plot(figsize=(24,10))
plt.savefig(path+r"/pictures/with_nwp.png")
plt.close()
data[‘实际功率’]=data[‘实际功率’].map(lambda x: x if x> 0 else 0)
data= data[[“实际功率”,“预测风速”]]
#删除某行中某个值为0的行
data= data[data[‘实际功率’] != np.nan]
data=data.fillna(value=‘0’)

'''
print("===========================生成短期预测数据集==========================================")
#生成短期预测数据集
x_train, y_train,x_test,y_test,result=make_shorttime_data(data)

print("=========================短期预测stacking方法===========================")
print("1.短期预测stacking方法预测结果:")
short_stacking=short_stacking_model(x_train, y_train,x_test,y_test)
shorttime_model_make_result(x_train, y_train,x_test,y_test,short_stacking,"short_stacking",Cap,result)
''' 
 
print("===========================生成超短期预测数据集==========================================")
#生成超短期预测数据集
train_x, train_y,test_x,test_y,result_,result16=make_ultrashorttime_data(data)

print("=========================超短期预测stacking方法===========================")
print("2.超短期预测stacking方法预测结果:")
ultrashort_stacking=ultrashort_stacking_model(train_x, train_y,test_x,test_y)
ultrashorttime_model_make_result(train_x, train_y,test_x,test_y,ultrashort_stacking,"ultrashort_stacking",Cap,result_)
ultrashorttime_model_make_result_16output(train_x, train_y,test_x,test_y,ultrashort_stacking,"ultrashort_stacking",Cap,result16)

兔兔爱学习兔兔爱学习

关注

25
点赞
踩
17

收藏

觉得还不错? 一键收藏
0
评论
回归的stacking方法代码以及多输出回归的stacking代码方法

definit# 确保Y的维度与构造的元特征维度相匹配，无需flatten# 对每个基础学习器预测，组合成元特征# 使用元学习器进行最终预测# 假设其他必要的导入和变量如 CatBoostRegressor, LGBMRegressor 等已正确定义# 注意：下面的 CatBoostRegressor 和 LGBMRegressor 应当正确导入并初始化# 实例化自定义的多输出 Stacking 模型# 训练模型# 预测并评估。
复制链接

扫一扫