sklearn下回归预测结果与测试集拟合曲线图绘制

使用sklearn回归预测结果与测试集拟合曲线图绘制

以上文用过的随机森林模型为例,在函数中需要增加的有三项
1.使用测试集的X特征值算出你要预测的特征值存到y_project中(这个y_project取什么名字随便)
y_predict = model.predict(X_test)
2.在返回中加入两项
“predict”: y_predict,#为刚才的预测值
“y_test”: y_test#为测试集的想要特征值的真实值

import pandas as pd

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV, train_test_split

from application import logger
from application.utils import ModelScoreUtil


def train(X_data, y_data):

    # 数据拆分: 训练集、测试集
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2, random_state=42)
    # 创建模型并寻参
    model = GridSearchCV(
        estimator=RandomForestRegressor(),
        param_grid={
            'max_depth': range(20,40,5),
            'n_estimators':range(200,600,100)


        },
        verbose=4,
        n_jobs=6,

    )
    #
    logger.info(f"训练模型:开始")
    model.fit(X_train, y_train)
    logger.info(f"训练模型:完成")
    y_predict = model.predict(X_test)
    #
    model_score_check_result = ModelScoreUtil.check(model=model, X_test=X_test, y_test=y_test)
    logger.info(f"模型评分结果: \n {pd.DataFrame([model_score_check_result])}")
    # 返回数据
    return {
        "model": model,
        "best_param": model.best_params_,
        "model_score_check_result": model_score_check_result,
        "predict": y_predict,
        "y_test": y_test
    }
    pass


if __name__ == '__main__':

    pass

再看主函数中怎么调用这两项predict和y_test画图
画图部分的代码如下

    fig = plt.figure(figsize=(200, 3))  # dpi参数指定绘图对象的分辨率,即每英寸多少个像素,缺省值为80
    axes = fig.add_subplot(1, 1, 1)#设置查看的子图
    line1, = axes.plot(range(len(predict)), predict, 'b--', label='predict', linewidth=2)
    line3, = axes.plot(range(len(predict)), y_test, 'g', label='true')
    axes.grid()
    fig.tight_layout()
    plt.legend(handles=[line1, line3])
    plt.title('拟合曲线')
    plt.show()
    #
    pass

.plot(x, y, ls="-", lw=2, label=“plot figure”)

x: x轴上的数值

y: y轴上的数值

ls:折线图的线条风格

lw:折线图的线条宽度

label:标记图内容的标签文本

如对曲线line1
x:(range(len(predict) #为给预测值从1开始发的序号
y:predict #为预测值
颜色:b–
label:predict
linewideth:2

line1为特征值预测值的曲线,line3为实际值的曲线
line1, = axes.plot(range(len(predict)), predict, ‘b–’, label=‘predict’, linewidth=2)
line3, = axes.plot(range(len(predict)), y_test, ‘g’, label=‘true’)
plt.legend(handles=[line1, line3])

# encoding=utf-8
import datetime
import pickle

from application import logger, model_algorithm
from application.data_source.ds_model_station_supply_water_temper import query_model_station_supply_water_temper
from application.model_algorithm.outlier.iforest import isolation_forest
from application.utils import ModelStorePathUtil, MySQLUtils
import matplotlib.pyplot as plt


def create(model_name, station_id, start_time, end_time):
    """
    创建模型:二次供水温度
    :param model_name:
    :param station_id:
    :param start_time:
    :param end_time:
    :return:
    """
    #
    # 查询数据
    data = query_model_station_supply_water_temper(station_id=station_id, start_time=start_time, end_time=end_time)
    logger.info(f"二次供温历史数据:\n {data}")
    logger.info(f"二次供温历史数据:\n {data.columns}")
    #
    # 数据过滤:字段
    data = data[[
        "pre_time",
        "outside_temper",
        "outside_weather",
        #"outside_humidity",
        "outside_wind",
        "outside_wind_speed",
        "average_inside_temper",
        "supply_water_temper",
        #"water_deviation_temper"
    ]]
    # 数据集为空
    if len(data) == 0:
        # raise BaseException("没有可用的数据集")
        return logger.info(f"没有可用的数据集:{model_name}, {station_id}, {start_time}, {end_time}")
        pass
    #
    # 离群点数据:查找
    iForest_index, outlier_label = isolation_forest(data=data)
    # 离群点数据:去除
    data = data.drop(index=iForest_index)
    data = data.reset_index(drop=True)
    #
    X_data, y_data = data[[
        "pre_time",
        "outside_temper",
        "outside_weather",
        #"outside_humidity", # 去掉湿度
        "outside_wind",
        "outside_wind_speed",
        "average_inside_temper",
        #"water_deviation_temper"
    ]], data['supply_water_temper']
    #
    # ==============================================================================================================
    # 开始训练:lightGBM
    # ==============================================================================================================
    train_result_lightGBM: dict = model_algorithm.huak_rf.train(
        X_data=X_data, y_data=y_data
    )
    # 训练结果
    logger.info(f"训练结果: \n {train_result_lightGBM}")
    #
    model_lightGBM = train_result_lightGBM.get("model")
    best_param_lightGBM = train_result_lightGBM.get("best_param")
    predict = train_result_lightGBM.get("predict")
    y_test = train_result_lightGBM.get("y_test")
    model_score_check_result_lightGBM = train_result_lightGBM.get("model_score_check_result")
    # 模型名称
    model_name = str(model_name)
    logger.info(f"模型名称: {model_name}")

    # 模型存储路径
    model_store_path_01 = ModelStorePathUtil.get_model_path_by_model_name_type2(model_name=model_name)
    logger.info(f"模型存储路径[model_store_path_01]: {model_store_path_01}")

    # 模型存储路径
    model_store_path_02 = ModelStorePathUtil.get_model_path_by_model_name_type2_with_start_end_time(
        model_name=model_name, start_time=start_time, end_time=end_time
    )
    logger.info(f"模型存储路径[model_store_path_02]: {model_store_path_02}")
    # 保存模型
    pickle.dump(model_lightGBM, open(model_store_path_01, "wb"))
    pickle.dump(model_lightGBM, open(model_store_path_02, "wb"))
    #
    Time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    #
    sql = f"UPDATE sn_control_hotstation_model " \
          f"SET is_created=1 ,last_model_update_time= '{Time}' " \
          f"WHERE model_type=2 AND station_id ='{station_id}' "
    #
    # 更新语句
    logger.info(f"更新语句: {sql}")
    # 执行更新
    result = MySQLUtils.execute(query=sql)
    # 更新结果
    logger.info(f"更新结果: {result}")
    #
    logger.info(f"模型存储完毕: {model_name}")
    # 绘制预测结果和测试集拟合图
    logger.info(f"绘制{model_name}结果拟合图")

    fig = plt.figure(figsize=(200, 3))  # dpi参数指定绘图对象的分辨率,即每英寸多少个像素,缺省值为80
    axes = fig.add_subplot(1, 1, 1)#设置查看的子图
    line1, = axes.plot(range(len(predict)), predict, 'b--', label='predict', linewidth=2)
    line3, = axes.plot(range(len(predict)), y_test, 'g', label='true')
    axes.grid()
    fig.tight_layout()
    plt.legend(handles=[line1, line3])
    plt.title('拟合曲线')
    plt.show()
    #
    pass


if __name__ == '__main__':
    create(model_name="政泉花园二期北区-低区", station_id="1552",
           start_time="2020-11-01 00:00:00",
           end_time="2020-12-01 00:00:00")
    pass

拟合曲线如下
最终拟合曲线如下

  • 5
    点赞
  • 33
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
好的,以下是绘制逻辑回归ROC曲线的Python代码示例: ```python import numpy as np import matplotlib.pyplot as plt from sklearn.metrics import roc_curve, auc from sklearn.linear_model import LogisticRegression from sklearn.datasets import make_classification from sklearn.model_selection import train_test_split # 生成具有2个特征的二分类数据集 X, y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, random_state=0) # 将数据集分为训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) # 创建逻辑回归模型并拟合训练数据 lr = LogisticRegression() lr.fit(X_train, y_train) # 预测测试集中的类别概率 y_score = lr.predict_proba(X_test)[:, 1] # 计算ROC曲线的各项指标 fpr, tpr, thresholds = roc_curve(y_test, y_score) roc_auc = auc(fpr, tpr) # 绘制ROC曲线 plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc) plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic example') plt.legend(loc="lower right") plt.show() ``` 这段代码将生成一个具有2个特征的二分类数据集,并使用逻辑回归模型拟合训练数据。然后,通过预测测试集中的类别概率计算ROC曲线的各项指标,并使用Matplotlib库绘制ROC曲线。最终的ROC曲线将显示在一个新的形窗口中。
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值