Confusion Matrix
import pandas as pd import numpy as np df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None) from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import train_test_split X = df.loc[:, 2:].values y = df.loc[:, 1].values le = LabelEncoder() y = le.fit_transform(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, stratify=y, random_state=1) from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.model_selection import cross_val_score pipe_svc = make_pipeline(StandardScaler(), SVC(random_state=1)) from sklearn.metrics import confusion_matrix pipe_svc.fit(X_train, y_train) y_pred = pipe_svc.predict(X_test) confmat = confusion_matrix(y_true=y_test, y_pred=y_pred) print(confmat)
如果想要改变位置
confmat = confusion_matrix(y_true=y_test, y_pred=y_pred, labels=[1, 0])
import pandas as pd import numpy as np df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None) from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import train_test_split X = df.loc[:, 2:].values y = df.loc[:, 1].values le = LabelEncoder() y = le.fit_transform(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, stratify=y, random_state=1) from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.model_selection import cross_val_score pipe_svc = make_pipeline(StandardScaler(), SVC(random_state=1)) from sklearn.metrics import confusion_matrix pipe_svc.fit(X_train, y_train) y_pred = pipe_svc.predict(X_test) confmat = confusion_matrix(y_true=y_test, y_pred=y_pred) print(confmat) import matplotlib.pyplot as plt fig, ax = plt.subplots(figsize=(2.5, 2.5)) ax.matshow(confmat, cmap=plt.cm.Blues, alpha=0.3) for i in range(confmat.shape[0]): for j in range(confmat.shape[1]): ax.text(x=j, y=i, s=confmat[i, j], va='center', ha='center') plt.xlabel('Predicted label') plt.ylabel('True label') plt.tight_layout() plt.show()
Precision/Recall/F1
import pandas as pd import numpy as np df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None) from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import train_test_split X = df.loc[:, 2:].values y = df.loc[:, 1].values le = LabelEncoder() y = le.fit_transform(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, stratify=y, random_state=1) from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.model_selection import cross_val_score pipe_svc = make_pipeline(StandardScaler(), SVC(random_state=1)) from sklearn.metrics import confusion_matrix pipe_svc.fit(X_train, y_train) y_pred = pipe_svc.predict(X_test) from sklearn.metrics import precision_score, recall_score, f1_score print('Precision: %.3f' % precision_score(y_true=y_test, y_pred=y_pred)) print('Recall: %.3f' % recall_score(y_true=y_test, y_pred=y_pred)) print('F1: %.3f' % f1_score(y_true=y_test, y_pred=y_pred))
Scoring Metric(Precision/Recall/F1) in GridSearchCV
import pandas as pd import numpy as np df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None) from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import train_test_split X = df.loc[:, 2:].values y = df.loc[:, 1].values le = LabelEncoder() y = le.fit_transform(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, stratify=y, random_state=1) from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.model_selection import cross_val_score pipe_svc = make_pipeline(StandardScaler(), SVC(random_state=1)) from sklearn.metrics import confusion_matrix pipe_svc.fit(X_train, y_train) from sklearn.metrics import make_scorer from sklearn.metrics import precision_score, recall_score, f1_score scorer = make_scorer(f1_score, pos_label=0) c_gamma_range = [0.01, 0.1, 1.0, 10.0] param_grid = [{'svc__C': c_gamma_range, 'svc__kernel': ['linear']}, {'svc__C': c_gamma_range, 'svc__gamma': c_gamma_range, 'svc__kernel': ['rbf']}] gs = GridSearchCV(estimator=pipe_svc, param_grid=param_grid, scoring=scorer, cv=10, n_jobs=-1) gs = gs.fit(X_train, y_train) print(gs.best_score_) print(gs.best_params_)