import csv import os import numpy as np from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.model_selection import cross_val_score from matplotlib import pyplot as plt datapath = 'D:\\study\\python\\pythondatamining_study\\ionosphere.data' # 拼接datapath和“Ionosphere” D:\\study\\python\\pythondatamining_study\\Ionosphere\\ionosphere.data # data_filename = os.path.join(datapath, "Ionosphere", # "ionosphere.data") x = np.zeros((351, 34), dtype='float') y = np.zeros((351,), dtype='bool') with open(datapath, 'r') as input_file: reader = csv.reader(input_file) for i, row in enumerate(reader): data = [float(datum) for datum in row[:-1]] x[i] = data y[i] = row[-1] == 'g' x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=14) estimator = KNeighborsClassifier() estimator.fit(x_train, y_train) y_predicted = estimator.predict(x_test) accuracy1 = np.mean(y_test == y_predicted) * 100 print('acc is {0:.1f}%'.format(accuracy1)) scores = cross_val_score(estimator, x, y, scoring='roc_auc', cv=3) # cv中折数默认为3,即将在未来的版本中更换为5
average_accuracy = np.mean(scores) * 100 print('average auc is {0:.1f}%'.format(average_accuracy)) #'{0:.1f}%'.format用来格式化输出数据 avg_scores = [] all_scores = [] parameter_values = list(range(1, 21)) for n in parameter_values: estimator = KNeighborsClassifier(n_neighbors=n) scores = cross_val_score(estimator, x, y, scoring='accuracy', cv=3) avg_scores.append(np.mean(scores)) all_scores.append(scores) plt.plot(parameter_values, avg_scores, '-o') plt.show()
scoring可选参数如下: