python sklearn 随机森林

python sklearn 随机森林

文件下载地址
链接: https://pan.baidu.com/s/1dAnxc-6EaP9sUfyXtnUy7A 提取码: 9pf8

"""
    随机森林
"""
import numpy as np
import sklearn.utils as su
import sklearn.ensemble as se
import matplotlib.pyplot as mp
import sklearn.metrics as sm


def train_day():
    # 1.准备数据
    lines = np.loadtxt(r".\bike_day.csv", delimiter=',', dtype='str')
    header_name = lines[:1, 2:13].astype('str').reshape(11, )
    x = lines[1:, 2:13].astype('float')
    y = lines[1:, -1].astype('float')
    print(header_name.shape, header_name.dtype)
    print(x.shape, x.dtype)
    print(y.shape, y.dtype)
    print(header_name)
    # 2.训练集 验证集分开
    x, y = su.shuffle(x, y, random_state=7)
    train_size = int(len(x) * 0.9)
    train_x, train_y = x[:train_size], y[:train_size]
    test_x, test_y = x[train_size:], y[train_size:]
    # 3.随机森林模型训练
    model = se.RandomForestRegressor(
        max_depth=10, n_estimators=1000, min_samples_split=2)
    model.fit(train_x, train_y)
    predict_test_y = model.predict(test_x)
    # 4.模型评估
    print(sm.r2_score(test_y, predict_test_y))
    print(sm.mean_absolute_error(test_y, predict_test_y))

    day_feature = model.feature_importances_
    return day_feature, header_name


def train_hour():
    # 1.准备数据
    lines = np.loadtxt(r".\bike_hour.csv", delimiter=',', dtype='str')
    header_name = lines[:1, 2:14].astype('str').reshape(12, )
    x = lines[1:, 2:14].astype('float')
    y = lines[1:, -1].astype('float')
    print(header_name.shape, header_name.dtype)
    print(x.shape, x.dtype)
    print(y.shape, y.dtype)
    print(header_name)

    # 2.训练集 验证集分开
    x, y = su.shuffle(x, y, random_state=7)
    train_size = int(len(x) * 0.9)
    train_x, train_y = x[:train_size], y[:train_size]
    test_x, test_y = x[train_size:], y[train_size:]

    # 3.随机森林模型训练
    model = se.RandomForestRegressor(
        max_depth=10, n_estimators=1000, min_samples_split=2)
    model.fit(train_x, train_y)
    predict_test_y = model.predict(test_x)

    # 4.模型评估
    print(sm.r2_score(test_y, predict_test_y))
    print(sm.mean_absolute_error(test_y, predict_test_y))
    hour_feature = model.feature_importances_
    return hour_feature, header_name


def draw_result(day_feature, day_header_name, hour_feature, hour_header_name):
    # 绘图
    mp.figure("Random Forest", facecolor="lightgray")

    mp.subplot(211)
    mp.title("Day Feature importance", fontsize=16)
    mp.ylabel("Feature importance", fontsize=14)
    x = np.arange(day_feature.size)

    # 对FI进行排序
    sorted_index = day_feature.argsort()[::-1]
    day_feature = day_feature[sorted_index]

    mp.bar(x, day_feature, 0.8, color="dodgerblue", label="day Feature importance")
    mp.grid(linestyle=":", axis="y")
    mp.xticks(x, day_header_name[sorted_index])
    mp.legend()
    mp.tight_layout()

    mp.subplot(212)
    mp.title("Hour Feature importance", fontsize=16)
    mp.ylabel("Feature importance", fontsize=14)
    x = np.arange(hour_feature.size)

    # 对FI进行排序
    sorted_index = hour_feature.argsort()[::-1]
    hour_feature = hour_feature[sorted_index]

    mp.bar(x, hour_feature, 0.8, color="orangered", label="Hour Feature importance")
    mp.grid(linestyle=":", axis="y")
    mp.xticks(x, hour_header_name[sorted_index])
    mp.legend()
    mp.tight_layout()

    mp.show()


if __name__ == '__main__':
    # 训练 day.csv数据
    day_fi, day_header = train_day()

    # 训练 hour.csv数据
    hour_hi, hour_header = train_hour()

    # 绘制结果图
    draw_result(day_fi, day_header, hour_hi, hour_header)

在这里插入图片描述

  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

廷益--飞鸟

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值