交叉验证步骤
- 首先将样本数据分为训练数据及测试数据
- 将训练数据分为K份
- 将K份数据选出1份做验证数据集,其他做训练数据
- 将训练出的模型评分做平均
交叉验证一般用来调参。
sklearn实现交叉验证
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
digits = load_digits()
x = digits.data
y = digits.target
x_train, x_test, y_train, y_test = train_test_split(x, y)
best_score, n, k = 0, 0, 0
for n in range(1, 11):
for k in range(1, 6):
knn = KNeighborsClassifier(weights='distance', n_neighbors=n, p=k)
# 交叉验证
score = cross_val_score(knn, x_train, y_train)
score = np.mean(score)
if score > best_score:
best_score = score
print(k)
print(n)
print(best_score)
knn = KNeighborsClassifier(n_neighbors=n, p=k)
knn.fit(x_train, y_train)
score = knn.score(x_test, y_test)
print(score)
输出
5
10
0.985895635412364
0.9844444444444445