重拾机器学习1 KNN

import csv
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from matplotlib import pyplot as plt

datapath = 'D:\\study\\python\\pythondatamining_study\\ionosphere.data'

# 拼接datapath和“Ionosphere”   D:\\study\\python\\pythondatamining_study\\Ionosphere\\ionosphere.data
# data_filename = os.path.join(datapath, "Ionosphere",
#                              "ionosphere.data")

x = np.zeros((351, 34), dtype='float')
y = np.zeros((351,), dtype='bool')

with open(datapath, 'r') as input_file:
    reader = csv.reader(input_file)

    for i, row in enumerate(reader):
        data = [float(datum) for datum in row[:-1]]
        x[i] = data
        y[i] = row[-1] == 'g'

x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=14)

estimator = KNeighborsClassifier()
estimator.fit(x_train, y_train)

y_predicted = estimator.predict(x_test)
accuracy1 = np.mean(y_test == y_predicted) * 100
print('acc is {0:.1f}%'.format(accuracy1))

scores = cross_val_score(estimator, x, y, scoring='roc_auc', cv=3) # cv中折数默认为3,即将在未来的版本中更换为5average_accuracy = np.mean(scores) * 100
print('average auc is {0:.1f}%'.format(average_accuracy)) #'{0:.1f}%'.format用来格式化输出数据

avg_scores = []
all_scores = []

parameter_values = list(range(1, 21))
for n in parameter_values:
    estimator = KNeighborsClassifier(n_neighbors=n)
    scores = cross_val_score(estimator, x, y, scoring='accuracy', cv=3)
    avg_scores.append(np.mean(scores))
    all_scores.append(scores)

plt.plot(parameter_values, avg_scores, '-o')
plt.show()

scoring可选参数如下:

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值