比较10折交叉验证法和留一法
加载函数包和数据集
from sklearn.model_selection import KFold,LeaveOneOut # 交叉验证所需的子集划分方法
from sklearn.linear_model.logistic import LogisticRegression
from sklearn import datasets # 自带数据集
import numpy as np
iris = datasets.load_iris() # 加载数据集
print('样本集大小:',iris.data.shape,iris.target.shape)
定义交叉验证函数
def partition(k, X, y, Model, Method):
# Random seed: reproducibility
np.random.seed(1)
# accuracy score
train_accuracy = [0 for i in range(k)]
test_accuracy = [0 for i in range(k)]
# index
idx = 0
if Method == 'KFold':#k折交叉验证法(k=10)
m = KFold(n_splits=k, shuffle=True)
if Method == 'LeaveOneOut':#留一法
m = LeaveOneOut()
# Generate the sets
for train_index, test_index in m.split(X):
# Iteration number
# print(train_index,len(train_index))
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
# Calling the function/model
if Model == "Logit":
clf = LogisticRegression(random_state=0)
# Fit the model
clf = clf.fit(X_train, y_train)
y_train_pred = clf.predict(X_train)
y_test_pred = clf.predict(X_test)
train_accuracy[idx] = np.mean(y_train_pred == y_train)
test_accuracy[idx] = np.mean(y_test_pred == y_test)
idx += 1
return train_accuracy, test_accuracy
进行对率回归
10折交叉验证法
train_acc,test_acc = partition(10,iris.data,iris.target,"Logit",'KFold')
print(np.mean(train_acc),np.mean(test_acc))
0.9607407407407406 0.9466666666666667
留一法
train_acc,test_acc = partition(len(iris.target),iris.data,iris.target,"Logit",'LeaveOneOut')
print(np.mean(train_acc),np.mean(test_acc))
0.9626845637583892 0.9533333333333334