python回归分析lasso,ridge,SVM,rf,kfold,pcc

该博客介绍了使用交叉验证(K-Fold)进行模型调优的方法,包括LASSO回归、RIDGE回归、支持向量机(SVM)和K近邻(KNN)回归。通过GridSearchCV寻找最优参数,以提高模型预测性能。代码示例展示了如何应用这些模型于糖尿病数据集,并计算预测结果与实际值的皮尔逊相关系数(PCC)。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

from sklearn.linear_model import Ridge, Lasso, ElasticNetCV
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.svm import SVR
from sklearn.ensemble import AdaBoostRegressor, RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from scipy.stats import pearsonr
import numpy as np
import pandas as pd


def lasso(x_train, y_train, x_test):
    param_grid = {'alpha':[0.0005,0.001,0.005,0.01,0.05,0.1,0.5,1,5]}
    model = GridSearchCV(Lasso(),param_grid,cv=10).fit(x_train,y_train)
    print("网格搜索最优得分:", model.best_score_)
    print("网格搜索最优参数组合:\n", model.best_params_)
    y_pred = model.predict(x_test)
    return y_pred

def ridge(x_train, y_train, x_test):
    param_grid = {'alpha':[0.0005,0.001,0.005,0.01,0.05,0.1,0.5,1,5]}
    model = GridSearchCV(Ridge(),param_grid,cv=10).fit(x_train,y_train) #cv=10为10折交叉验证
    print("网格搜索最优得分:", model.best_score_)
    print("网格搜索最优参数组合:\n", model .best_params_)
    y_pred = model.predict(x_test)
    return y_pred

def svr(x_train, y_train, x_test):
    param_grid = {'gamma':range(1,10),'C':range(1,100,10)}
    model = GridSearchCV(SVR(),param_grid,cv = 10,scoring='r2').fit(x_train,y_train)
    print("网格搜索最优得分:", model.best_score_)
    print("网格搜索最优参数组合:\n", model.best_params_)
    y_pred = model.predict(x_test)
    return y_pred
'''
def random_forest(x_train, y_train, x_test):
    param_grid = {'n_estimators':range(100,1001,100),'max_features':['auto','sqrt','log2']}  # 50,500,50
    model = GridSearchCV(RandomForestRegressor(n_jobs = 24),param_grid,cv = 10,scoring = 'r2').fit(x_train,y_train)
    y_pred = model.predict(x_test)
    return y_pred


def AdaBoost(x_train, y_train, x_test):
    param_0 = {'min_samples_leaf':range(1,5)}
    base_model = GridSearchCV(DecisionTreeRegressor(),param_0,cv=10,scoring='r2')
    base_model.fit(x_train,y_train)
    base_samples_leaf = base_model.best_params_['min_samples_leaf']
    param_grid = {'n_estimators':range(10,101,10),'loss':['square','linear','exponential']}
    model = GridSearchCV(AdaBoostRegressor(base_estimator = DecisionTreeRegressor(min_samples_leaf=base_samples_leaf)),param_grid,cv = 10,scoring = 'r2').fit(x_train,y_train)
    y_pred = model.predict(x_test)
    return y_pred
   
def elastic(x_train, y_train, x_test):
    model = ElasticNetCV(cv = 10, max_iter = 100000).fit(x_train,y_train)
    y_pred = model.predict(x_test)
    return y_pred

def knn(x_train, y_train, x_test):
    param_grid = {'n_neighbors':range(2,20,2)}
    model = GridSearchCV(KNeighborsRegressor(n_jobs = 24),param_grid,cv = 10,scoring = 'r2').fit(x_train,y_train)
    y_pred = model.predict(x_test)
    return y_pred
 '''


def Kflodtrain(X,Y,fold,model_type):
    kfold = KFold(n_splits=fold, shuffle=True)
    y_test_total, y_pred_total = [],[]
    for train_index, test_index in kfold.split(X,Y):
        x_train, y_train = X[train_index], Y[train_index]
        x_test, y_test = X[test_index], Y[test_index]
        if model_type == "LASSO":
            y_pred_total.append(lasso(x_train,y_train,x_test))
        elif model_type == 'SVM':
            y_pred_total.append(svr(x_train,y_train,x_test))
        #elif model_type == "RF":
            #y_pred_total.append(random_forest(x_train,y_train,x_test))
        else: # 'ridge':
            y_pred_total.append(ridge(x_train,y_train,x_test))
        y_test_total.append(y_test)

    y_test_total = np.concatenate(y_test_total,axis = 0)
    y_pred_total = np.concatenate(y_pred_total,axis = 0)
    return y_test_total, y_pred_total


def main():
    #
    diabetes = np.array(pd.read_csv("C:/Users/24224/Desktop/diabetes.csv",header=0).values)
    x = diabetes[:,:10]
    y = diabetes[:,10]
    fold = 10
    for model_type in ['LASSO','ridge','SVM']:#
        print(model_type)
        PCC = []
        y_test, y_pred = [], []

        for i in range(10):
            print("i=",i," start")
            y_test_total, y_pred_total = Kflodtrain(x, y, fold, model_type)
            y_test.append(y_test_total)
            y_pred.append(y_pred_total)
            PCC.append(pearsonr(y_test_total.ravel(), y_pred_total.ravel()))
            print("i=",i," end")
        c = {'PCC':PCC}
        file_name = 'C:/Users/24224/Desktop/'
        res_pcc = pd.DataFrame(c)
        res_pcc.to_excel(file_name + model_type + 'PCC.xlsx', index=False)

if __name__ == "__main__":
    main()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值