在界面设计完成之后,就需要关联各个控件的事件。当然,完成如下所示的模型评估GUI界面,看似简单,却仍需要做大量重复工作,比如各个分类回归算法参数设计,分类回归算法评估方式及绘图形式。在这里做一些规定,针对结构化数据集,我们希望,数据格式如下:第一列为样本标识,中间为特征列,最后一列为预测目标列。通过单击打开训练数据按钮,展示数据集,同时相关特征展示在描述子选择树形控件下,可供选择。详细的操作见教你用PyQt一步步打造一个属于自己的机器学习GUI-数据加载展示-03。
import csv
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve, auc, \
precision_recall_curve
from sklearn.model_selection import cross_val_score, train_test_split, LeaveOneOut, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import pandas as pd
import numpy as np
from sklearn.metrics import *
class ModelEvaluation(object):
def __init__(self,X,y):
self.X = X
self.y = y
def holdout_binary_task(self,model,test_size = 0.3):
metrics_dict = pd.DataFrame()
X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size = test_size, random_state = 42)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:,1]
accuracy = accuracy_score(y_test, y_pred)
precision_ = precision_score(y_test, y_pred)
recall_ = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
fpr, tpr, _ = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)
precision, recall, _ = precision_recall_curve(y_test, y_prob)
pr_auc = auc(recall, precision)
metrics_dict = metrics_dict.append({
'accuracy': accuracy,
'precision_': precision_,
'recall_': recall_,
'F1-score': f1,
'AUC-ROC': roc_auc,
"PR-AUC":pr_auc,
"fpr":fpr,
"tpr": tpr,
"precision": precision,
"recall": recall,
},ignore_index=True)
plot_roc_data = pd.DataFrame({
"fpr": fpr,
"tpr": tpr,
}).to_csv("D://pycharm_project//Machine_Learning_GUI//MachineLearning_GUI_Design//datas//holdout_roc_%s.csv" % (
model.__class__.__name__))
plot_pr_data = pd.DataFrame({
"precision": precision,
"recall": recall
}).to_csv("D://pycharm_project//Machine_Learning_GUI//MachineLearning_GUI_Design//datas//holdout_pr_%s.csv" % (
model.__class__.__name__))
metrics_dict.to_csv("D://pycharm_project//Machine_Learning_GUI//MachineLearning_GUI_Design//datas//holdout_%s.csv"%(model.__class__.__name__))
return metrics_dict
def loocv_binary_task(self,model):
X_train = np.array(self.X)
y_train = np.array(self.y)
loo = LeaveOneOut()
all_y = []
all_probs = []
accuracy = 0
y_ps = []
for train, test in loo.split(X_train, y_train):
model.fit(X_train[train], y_train[train])
y_p = model.predict(X_train[test])
y_ps.append(y_p)
if y_p == y_train[test]:
accuracy += 1
all_y.append(np.array(y_train)[test])
all_probs.append(model.fit(X_train[train], y_train[train]).predict_proba(X_train[test])[:, 1])
fpr, tpr, thresholds = roc_curve(all_y, all_probs)
roc_auc = auc(fpr, tpr)
precision, recall, _ = precision_recall_curve(all_y, all_probs)
pr_auc = auc(recall, precision)
precision_ = precision_score(all_y, y_ps)
recall_ = recall_score(all_y, y_ps)
f1 = f1_score(all_y, y_ps)
plot_roc_data = pd.DataFrame({
"fpr":fpr,
"tpr":tpr,
}).to_csv("D://pycharm_project//Machine_Learning_GUI//MachineLearning_GUI_Design//datas//loocv_roc_%s.csv"%(model.__class__.__name__))
plot_pr_data = pd.DataFrame({
"precision": precision,
"recall": recall
}).to_csv("D://pycharm_project//Machine_Learning_GUI//MachineLearning_GUI_Design//datas//loocv_pr_%s.csv"%(model.__class__.__name__))
return roc_auc,pr_auc, accuracy/np.shape(X_train)[0],precision_,recall_,f1,fpr,tpr, precision, recall
def skfcv_binary_task(self,model,k_fold = 5):
pass
def holdout_multi_task(self,model,test_size = 0.2,classes_list = [0,1,2]):
pass
以上代码仅供参考,尚未成熟,同时只是整个GUI界面设计的其中一个环节,后面会整合自动特征提取,特征选择,超参数自动选优,以及材料领域一些深度学习应用如图卷积,电池soc,soh等时序数据预测。欢迎提供建议和意见。
更多内容,参考订阅号:数道