参考链接:
https://blog.csdn.net/koukehui0292/article/details/83060788
https://www.jianshu.com/p/bb7f3d51d7f0
N折交叉验证
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn import svm
x = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1], [2,3], [1,4]])
y = np.array([1, 1, 2, 2, 2, 1])
model = svm.SVC()
# 验证模型
# scoring:accuracy:精确度 precision_weighted:查准率 recall_weighted:召回率 f1_weighted:f1得分
# cv:几折交叉验证
# n_jobs:同时工作的cpu个数(-1代表全部)
acc=cross_val_score(model, x, y, cv=2, scoring='accuracy', n_jobs=-1)
print('acc:',acc)
# 训练、预测
model.fit(x, y)
data=[[5.8, -1]]
pred=model.predict(data)
print (pred)
各种交叉验证方法
import numpy as np
from sklearn import svm
X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
y= np.array([1, 2, 3, 4])
model = svm.SVC()
total=[]
# 1.K折
from sklearn.model_selection import KFold
kf = KFold(n_splits=2)
kf.get_n_splits(X)
for train_index,test_index in kf.split(X):
print("TRAIN:", train_index, "TEST:", test_index)
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
# 训练、预测
model.fit(X_train, y_train)
pred=model.predict(X_test)
score = (y_test == pred).sum() / y_test.size
total.append(score)
acc=np.mean(total)
print('acc:',acc)
# # 2.重复K折
# from sklearn.model_selection import RepeatedKFold
# rkf = RepeatedKFold(n_splits=2, n_repeats=2, random_state=2652124)
# for train_index, test_index in rkf.split(X):
# print("TRAIN:", train_index, "TEST:", test_index)
# X_train, X_test = X[train_index], X[test_index]
# y_train, y_test = y[train_index], y[test_index]
# # 3.留一交叉验证(LOO)
# from sklearn.model_selection import LeaveOneOut
# loo = LeaveOneOut()
# loo.get_n_splits(X)
# for train_index, test_index in loo.split(X):
# print("TRAIN:", train_index, "TEST:", test_index)
# X_train, X_test = X[train_index], X[test_index]
# y_train, y_test = y[train_index], y[test_index]
# # 4.留P交叉验证(LPO)
# from sklearn.model_selection import LeavePOut
# lpo = LeavePOut(2)
# lpo.get_n_splits(X)
# for train_index, test_index in lpo.split(X):
# print("TRAIN:", train_index, "TEST:", test_index)
# X_train, X_test = X[train_index], X[test_index]
# y_train, y_test = y[train_index], y[test_index]
# # 5.随机排列交叉验证
# from sklearn.model_selection import ShuffleSplit
# rs = ShuffleSplit(n_splits=3, test_size=0.25, random_state=0)
# rs.get_n_splits(X)
# for train_index, test_index in rs.split(X):
# print("TRAIN:", train_index, "TEST:", test_index)
# X_train, X_test = X[train_index], X[test_index]
# y_train, y_test = y[train_index], y[test_index]