from sklearn.linear_model import Ridge, Lasso, ElasticNetCV
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.svm import SVR
from sklearn.ensemble import AdaBoostRegressor, RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from scipy.stats import pearsonr
import numpy as np
import pandas as pd
def lasso(x_train, y_train, x_test):
param_grid = {'alpha':[0.0005,0.001,0.005,0.01,0.05,0.1,0.5,1,5]}
model = GridSearchCV(Lasso(),param_grid,cv=10).fit(x_train,y_train)
print("网格搜索最优得分:", model.best_score_)
print("网格搜索最优参数组合:\n", model.best_params_)
y_pred = model.predict(x_test)
return y_pred
def ridge(x_train, y_train, x_test):
param_grid = {'alpha':[0.0005,0.001,0.005,0.01,0.05,0.1,0.5,1,5]}
model = GridSearchCV(Ridge(),param_grid,cv=10).fit(x_train,y_train)
print("网格搜索最优得分:", model.best_score_)
print("网格搜索最优参数组合:\n", model .best_params_)
y_pred = model.predict(x_test)
return y_pred
def svr(x_train, y_train, x_test):
param_grid = {'gamma':range(1,10),'C':range(1,100,10)}
model = GridSearchCV(SVR(),param_grid,cv = 10,scoring='r2').fit(x_train,y_train)
print("网格搜索最优得分:", model.best_score_)
print("网格搜索最优参数组合:\n", model.best_params_)
y_pred = model.predict(x_test)
return y_pred
'''
def random_forest(x_train, y_train, x_test):
param_grid = {'n_estimators':range(100,1001,100),'max_features':['auto','sqrt','log2']} # 50,500,50
model = GridSearchCV(RandomForestRegressor(n_jobs = 24),param_grid,cv = 10,scoring = 'r2').fit(x_train,y_train)
y_pred = model.predict(x_test)
return y_pred
def AdaBoost(x_train, y_train, x_test):
param_0 = {'min_samples_leaf':range(1,5)}
base_model = GridSearchCV(DecisionTreeRegressor(),param_0,cv=10,scoring='r2')
base_model.fit(x_train,y_train)
base_samples_leaf = base_model.best_params_['min_samples_leaf']
param_grid = {'n_estimators':range(10,101,10),'loss':['square','linear','exponential']}
model = GridSearchCV(AdaBoostRegressor(base_estimator = DecisionTreeRegressor(min_samples_leaf=base_samples_leaf)),param_grid,cv = 10,scoring = 'r2').fit(x_train,y_train)
y_pred = model.predict(x_test)
return y_pred
def elastic(x_train, y_train, x_test):
model = ElasticNetCV(cv = 10, max_iter = 100000).fit(x_train,y_train)
y_pred = model.predict(x_test)
return y_pred
def knn(x_train, y_train, x_test):
param_grid = {'n_neighbors':range(2,20,2)}
model = GridSearchCV(KNeighborsRegressor(n_jobs = 24),param_grid,cv = 10,scoring = 'r2').fit(x_train,y_train)
y_pred = model.predict(x_test)
return y_pred
'''
def Kflodtrain(X,Y,fold,model_type):
kfold = KFold(n_splits=fold, shuffle=True)
y_test_total, y_pred_total = [],[]
for train_index, test_index in kfold.split(X,Y):
x_train, y_train = X[train_index], Y[train_index]
x_test, y_test = X[test_index], Y[test_index]
if model_type == "LASSO":
y_pred_total.append(lasso(x_train,y_train,x_test))
elif model_type == 'SVM':
y_pred_total.append(svr(x_train,y_train,x_test))
else:
y_pred_total.append(ridge(x_train,y_train,x_test))
y_test_total.append(y_test)
y_test_total = np.concatenate(y_test_total,axis = 0)
y_pred_total = np.concatenate(y_pred_total,axis = 0)
return y_test_total, y_pred_total
def main():
diabetes = np.array(pd.read_csv("C:/Users/24224/Desktop/diabetes.csv",header=0).values)
x = diabetes[:,:10]
y = diabetes[:,10]
fold = 10
for model_type in ['LASSO','ridge','SVM']:
print(model_type)
PCC = []
y_test, y_pred = [], []
for i in range(10):
print("i=",i," start")
y_test_total, y_pred_total = Kflodtrain(x, y, fold, model_type)
y_test.append(y_test_total)
y_pred.append(y_pred_total)
PCC.append(pearsonr(y_test_total.ravel(), y_pred_total.ravel()))
print("i=",i," end")
c = {'PCC':PCC}
file_name = 'C:/Users/24224/Desktop/'
res_pcc = pd.DataFrame(c)
res_pcc.to_excel(file_name + model_type + 'PCC.xlsx', index=False)
if __name__ == "__main__":
main()