基于DecisionTreeRegressor预测房地产市场价格的机器学习模型【python】

最新推荐文章于 2023-07-03 17:02:08 发布

摸鱼得鱼

最新推荐文章于 2023-07-03 17:02:08 发布

阅读量486

点赞数

文章标签： python 机器学习 sklearn

本文链接：https://blog.csdn.net/weixin_64275421/article/details/127192137

版权

get_x_y.py

import pandas as pd

from sklearn.model_selection import train_test_split

def get_xy():

    path = '******'
    #读取数据
    melbourne_data = pd.read_csv(path)
    filtered_melbourne_data = melbourne_data.dropna(axis=0)

    #获取x_y
    y = filtered_melbourne_data.Price
    melbourne_features = ['Rooms', 'Bathroom', 'Landsize', 'BuildingArea',
                          'YearBuilt', 'Lattitude', 'Longtitude']
    X = filtered_melbourne_data[melbourne_features]
    train_X, val_X, train_y, val_y = train_test_split(X, y, random_state=0)

    x_y = [train_X, val_X, train_y, val_y, X, y]

    return x_y

get_mae.py

from sklearn.metrics import mean_absolute_error
from sklearn.tree import DecisionTreeRegressor

def get_mae(max_leaf_nodes, train_X, val_X, train_y, val_y):
    """获得基于一个最大节点数的mae"""

    #建模
    model = DecisionTreeRegressor(max_leaf_nodes=max_leaf_nodes, random_state=0)

    #拟合
    model.fit(train_X, train_y)

    #预测
    preds_val =model.predict(val_X)

    #评估
    mae = mean_absolute_error(val_y, preds_val)

    return mae

get_min_max_leaf_nodes.py

from get_x_y import get_xy

from get_mae import get_mae

def get_min_mln():
    ''''返回最好的最大节点数、最小的mae，x的列表，y的列表'''

    x_y = get_xy()

    x_list = []
    mae_list = []

    #找到最小mae
    for x in range(5, 5000):
        mae = get_mae(x, train_X=x_y[0], val_X=x_y[1], train_y=x_y[2], val_y=x_y[3])
        x_list.append(x)
        mae_list.append(mae)

    #最好节点数
    min_mae = min(mae_list)
    num_min_mae = mae_list.count(min_mae)
    print(num_min_mae)
    best_x = x_list[mae_list.index(min_mae)]
    print(best_x, min_mae)

    return [best_x, min_mae, x_list, mae_list]

final_model.py

from get_min_max_leaf_notes import get_min_mln
from get_x_y import get_xy

from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error

#读取最佳max_leaf_nodes
get_list = get_min_mln()
min_mln = get_list[0]

#define
final_model = DecisionTreeRegressor(max_leaf_nodes=min_mln, random_state=1)

#fit
X_y = get_xy()
final_model.fit(X_y[4], X_y[5])

#predict
pred = final_model.predict(X_y[4])
print(pred, mean_absolute_error(X_y[5], pred))