from sklearn import datasets#引入数据集,sklearn包含众多数据集 from sklearn.model_selection import train_test_split#将数据分为测试集和训练集 from sklearn.neighbors import KNeighborsClassifier#利用邻近点方式训练数据
from sklearn.model_selection import train_test_split from sklearn.datasets.samples_generator import make_classification from sklearn.svm import SVC import matplotlib.pyplot as plt
from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.model_selection import cross_val_score#引入交叉验证 import matplotlib.pyplot as plt ###引入数据### iris=datasets.load_iris() X=iris.data y=iris.target ###设置n_neighbors的值为1到30,通过绘图来看训练分数### k_range=range(1,31) k_score=[] for k in k_range: knn=KNeighborsClassifier(n_neighbors=k) scores=cross_val_score(knn,X,y,cv=10,scoring='accuracy')#for classfication k_score.append(scores.mean()) plt.figure() plt.plot(k_range,k_score) plt.xlabel('Value of k for KNN') plt.ylabel('CrossValidation accuracy') plt.show() #K过大会带来过拟合问题,我们可以选择12-18之间的值
for k in k_range: knn=KNeighborsClassifier(n_neighbors=k) loss=-cross_val_score(knn,X,y,cv=10,scoring='neg_mean_squared_error')# for regression k_score.append(loss.mean())
from sklearn.model_selection import learning_curve from sklearn.datasets import load_digits from sklearn.svm import SVC import matplotlib.pyplot as plt import numpy as np
from sklearn.model_selection import validation_curve#将learning_curve改为validation_curve from sklearn.datasets import load_digits from sklearn.svm import SVC import matplotlib.pyplot as plt import numpy as np #引入数据 digits=load_digits() X=digits.data y=digits.target