2023年美赛春季赛Y题数据及代码参考

 

 

import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score, KFold
import pickle
import os
import joblib


def train_regression_model(
    X: pd.DataFrame, y: str, seed: int=42
):
    """
    Trains a linear model to predict the asking price of a 
    used sailboat given sailboat features.
    """


    print() # f"{len(y)} records in the training set.\n"


    cv = KFold(5, shuffle=True, random_state=seed)


    pipe = Pipeline([
        ('scaler', StandardScaler()), ('model', RandomForestRegressor(n_estimators=700,random_state=seed))
    ])


    pipe.fit(X, np.ravel(y.values))
    train_scores = cross_val_score(pipe, X, np.ravel(y.values), cv=cv)


    metrics = {
        'CV-R2': round(np.mean(train_scores), 3),
        'RMSE': np.sqrt(mean_squared_error(y, pipe.predict(X)))
    }
    print("Performance on Training Data:\n")
    print(f"{metrics}\n")


    if not os.path.isdir(r"models/"):
        os.mkdir("models")


    joblib.dump(pipe, 'models/regression_model' + '.compressed', compress=True)


if __name__ == "__main__":
    import argparse


    parser = argparse.ArgumentParser()
    parser.add_argument('xtrain', help='data\X_train.csv')
    parser.add_argument('ytrain', help='data\y_train.csv')
    
    args = parser.parse_args()


    X_TRAIN = pd.read_csv(args.xtrain, index_col=False)
    Y_TRAIN = pd.read_csv(args.ytrain, index_col=False)


    train_regression_model(X_TRAIN, Y_TRAIN)

 需要的后台私我哦~

 

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值