import numpy as np
from sklearn.model_selection import GroupKFold,KFold
def group_k_fold():
'''
K-fold iterator variant with non-overlapping groups.
根据组来划分数据,不同组的类别与数据的折数相等。相同组里面的数据不会出现在两个不同的折中。
The same group will not appear in two different folds (the number of
distinct groups has to be at least equal to the number of folds).
The folds are approximately balanced in the sense that the number of
distinct groups is approximately the same in each fold.
'''
X = np.array([[1, 2], [3, 4], [5, 6], [7, 8],[9,10],[10,11]])
y = np.array([1, 2, 3, 4,5,6])
groups = np.array([0, 0, 2, 2, 3, 3])
group_kfold = GroupKFold(n_splits=3)
# n_splits: int, default = 5 Number of folds.Must be at least of 2
group_kfold.get_n_splits(X, y, groups)
print(group_kfold)
for train_index, test_index in group_kfold.split(X, y, groups):
print("TRAIN:", train_index, "TEST:", test_index)
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
print('\n X_train:', X_train, '\n X_test:', X_test, '\n y_train:', y_train, '\n y_test:', y_test)
print('**** step ****')
def k_fold():
'''
将数据随机地分成K等份,每次留一份作为测试集,其余的作为训练集。进行K次
'''
x = ['a', 'b', 'c', 'd', 'e', 'f']
kf = KFold(n_splits=3) # 先将KFold实例化 3折就是将训练数据三等分,每次取其中的1/3作为测试集,其余的是训练集
for train, test in kf.split(x): # 调用split方法,将原始数据集划分
print(train, test)
group_k_fold()
kfold
最新推荐文章于 2024-04-18 09:00:07 发布