参考了han同学的答案,数据集也可在han同学的github上下载。
3.4 选择两个 UCI 数据集,比较 10 折交叉验证法和留 法所估计出的对率回归的错误率.
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
data_path = r'Transfusion.txt'
data = np.loadtxt(data_path, delimiter=',').astype(int)
X = data[:, :4]
y = data[:, 4]
# 样本数和属性数
m, n = X.shape
# 数据标准化
X = (X - X.mean()) / X.std(0)
# k-10
kfold = KFold(n_splits=10)
# print(kfold)
lr = LogisticRegression(C=2)
# score是准确率数组,cv代表多少折
score = cross_val_score(lr, X, y, cv=kfold)
print('acc of k-10: {}'.format(score.mean()))
# LOO,留一法
# LeaveOneOut() is equivalent to KFold(n_splits=n) and LeavePOut(p=1) where n is the number of samples.
loocv = LeaveOneOut()
# loocv = KFold(m)
score = cross_val_score(lr, X, y, cv=loocv)
print('acc of loo: {}'.format(score.mean()))