StratifiedShuffleSplit | StratifiedKFold | |
训练集、测试集划分 | 通过指定 test_size 或train_size | 通过指定 cv |
是否支持shuffle | 是 | 是 |
区别 | 保证每组训练集(或测试集)的数据分布一致,但不一定与原始数据分布一致 | 保证训练集(或测试集)与原始数据的分布一致。 |
from sklearn.model_selection import StratifiedShuffleSplit,StratifiedKFold
import numpy as np
X = np.array([
[1, 2], [3, 4], [1, 2], [3, 4], [1, 2],[3, 4], [1, 2], [3, 4],[3, 4],[3, 4],
[1, 2], [3, 4], [1, 2], [3, 4], [1, 2],[3, 4], [1, 2], [3, 4],[3, 4],[3, 4],
[1, 2], [3, 4], [1, 2], [3, 4], [1, 2],[3, 4], [1, 2], [3, 4],[3, 4],[3, 4],
[1, 2], [3, 4], [1, 2], [3, 4], [1, 2],[3, 4], [1, 2], [3, 4],[3, 4],[3, 4],
[1, 2], [3, 4], [1, 2], [3, 4], [1, 2],[3, 4], [1, 2], [3, 4],[3, 4],[3, 4]])
y = np.array([
0, 0, 0, 0, 1, 1, 0, 0, 1, 1,
0, 0, 0, 0, 1, 1, 0, 0, 1, 1,
0, 0, 0, 0, 1, 1, 0, 0, 1, 1,
0, 0, 0, 0, 1, 1, 0, 0, 1, 1 ,
0, 0, 0, 0, 1, 1, 0, 0, 1 ,1])
ss=StratifiedShuffleSplit(n_splits=5,tra,random_state=0)#分成5组,测试比例为0.25,
for train_index, test_index in ss.split(X, y):
y_train, y_test = y[train_index], y[test_index]#类别集对应的值
print(y_train,y_test)
print('/n')
[1 1 1 1 0 0 0 1 1 0 1 0 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 0 0 0 1 0 1 0 1]
[1 0 0 0 0 0 0 1 1 0 1 1 0 1 0]
[0 0 0 0 0 0 0 0 1 1 0 0 1 1 1 0 0 0 0 1 1 1 0 1 0 0 1 0 1 0 0 1 1 1 0]
[0 1 1 1 1 1 0 1 0 0 0 0 0 0 0]
[1 1 0 0 1 1 0 0 0 1 1 0 0 0 0 0 0 1 1 0 1 0 0 1 0 1 1 0 0 0 0 0 0 1 1]
[0 0 1 0 1 0 0 0 0 1 0 1 1 1 0]
[1 1 1 1 1 1 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0]
[1 0 0 1 0 0 0 0 0 1 1 0 1 0 1]
[1 0 1 1 0 1 0 1 0 0 0 1 1 1 0 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0]
[0 1 1 0 0 1 0 0 0 0 1 0 1 0 1]
sk=StratifiedKFold(n_splits=5, shuffle=True)
for train_idx, valid_idx in sk.split(X,y):
y_train, y_test = y[train_idx], y[valid_idx]#类别集对应的值
print(y_train,y_test)
print('/n')
[0 0 0 0 1 1 0 1 0 0 1 1 0 1 0 0 0 0 1 1 0 0 1 1 0 0 0 1 0 0 1 0 0 0 0 1 1 0 1 1]
[0 1 0 0 0 1 0 1 1 0]
[0 0 0 1 1 0 0 1 1 0 0 0 0 1 1 0 0 1 0 0 1 1 0 0 0 0 0 0 1 1 1 1 0 0 0 1 1 0 0 1]
[0 1 0 0 1 1 0 0 0 1]
[0 0 0 0 1 1 0 1 1 0 0 0 0 0 1 1 0 0 1 1 0 0 1 1 0 0 0 1 0 0 1 0 0 0 1 1 0 0 1 1]
[0 0 1 1 0 0 0 1 1 0]
[0 0 1 0 0 1 0 0 0 0 1 1 0 1 1 0 0 0 0 1 0 1 1 0 0 0 0 1 1 0 0 1 1 0 0 0 1 0 1 1]
[0 0 1 1 0 1 0 0 1 0]
[0 0 0 1 0 0 1 1 0 0 0 1 1 0 0 1 1 0 0 0 0 1 0 1 1 0 0 1 1 0 0 1 1 0 0 0 1 0 0 1]
[0 1 0 1 0 0 0 0 1 1]