# import numpy as np
# from sklearn.model_selection import KFold,StratifiedKFold
# X=np.array([
# [1,2,3,4],
# [11,12,13,14],
# [21,22,23,24],
# [31,32,33,34],
# [41,42,43,44],
# [51,52,53,54],
# [61,62,63,64],
# [71,72,73,74]
# ])
#
# y=np.array([1,1,0,0,1,1,0,0])
# #n_folds这个参数没有,引入的包不同,
# floder = KFold(n_splits=4,random_state=0,shuffle=False)
# sfolder = StratifiedKFold(n_splits=4,random_state=0,shuffle=False)
# print("---------------------StratifiedKFold---------------------------------")
# for train, test in sfolder.split(X,y):
# print('Train: %s | test: %s' % (train, test))
# print(" ")
# print("-----------------------------KFold------------------------------------")
# for train, test in floder.split(X,y):
# print('Train: %s | test: %s' % (train, test))
# print(" ")
#
# print("----------------------StratifiedKFold-----------------------------------")
#
# from sklearn.model_selection import StratifiedKFold
# X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [5, 6],[7, 8]])
# y = np.array([0, 0, 0, 1, 1, 1])#1的个数和0的个数要大于3,3也就是n_splits
# skf = StratifiedKFold(n_splits=2)
# # 结果有三行,和n_splits数相同
# for train_index, test_index in skf.split(X, y):
# print("TRAIN:", train_index, "TEST:", test_index)
#
# print("-----------------------------------------------------------------------")
#
# # coding:utf-8
# import numpy as np
# from sklearn.model_selection import KFold,StratifiedKFold
#
# X=np.array([
# ['a', 2, 122.21, 4],
# ['b', 3, 132.12, 14],
# ['c', 31, 155.33, 24],
# ['d', 12, 143.93, 34],
# ['c', 32, 124.31, 44],
# ['a', 1, 151.11, 54],
# ['b', 11, 112.33, 64],
# ['b', 21, 137.82, 74]
# ])
#
# y=np.array([1,1,0,0,1,1,0,0])
# sfolder = StratifiedKFold(n_splits=3,random_state=0,shuffle=False)
# floder = KFold(n_splits=3,random_state=0,shuffle=False)
#
# for train_index, test_index in sfolder.split(X, y):
# print("train_index:", train_index, "test_index:", test_index)
# y_train, y_test = y[train_index], y[test_index]
# X_train, X_test = X[train_index], X[test_index]
#
#
# print("X_train:", X_train, "y_train:", y_train)
# print("X_test:", X_test, "y_test", y_test)
#
#
#
#
#
#
# # Stratified K-Folds cross-validator
# #
# # Provides train/test indices to split data in train/test sets.
# #
# # This cross-validation object is a variation of KFold that returns
# # stratified folds. The folds are made by preserving the percentage of
# # samples for each class.
# #
# # Read more in the :ref:`User Guide <cross_validation>`.
# #
# # Parameters
# # ----------
# # n_splits : int, default=3
# # Number of folds. Must be at least 2.
# #
# # shuffle : boolean, optional
# # Whether to shuffle each stratification of the data before splitting
# # into batches.
# #
# # random_state : int, RandomState instance or None, optional, default=None
# # If int, random_state is the seed used by the random number generator;
# # If RandomState instance, random_state is the random number generator;
# # If None, the random number generator is the RandomState instance used
# # by `np.random`. Used when ``shuffle`` == True.
#
# """
# from sklearn.model_selection import StratifiedKFold
# X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
# y = np.array([0, 0, 1, 1])
# skf = StratifiedKFold(n_splits=2)
# temp = skf.get_n_splits(X, y)
# print("temp:", temp)
# print(skf) # doctest: +NORMALIZE_WHITESPACE
# print("-------------------------------------------------------")
# StratifiedKFold(n_splits=2, random_state=None, shuffle=False)
# for train_index, test_index in skf.split(X, y):
# print("train_index:", train_index, "test_index:", test_index)
# y_train, y_test = y[train_index], y[test_index]
# X_train, X_test = X[train_index], X[test_index]
#
#
# print("X_train:", X_train, "y_train:", y_train)
# print("X_test:", X_test, "y_test", y_test)
# """
#
# import numpy as np
# from sklearn.model_selection import KFold, StratifiedKFold
#
# X = np.array([
# [1, 2, 3, 4],
# [11, 12, 13, 14],
# [21, 22, 23, 24],
# [31, 32, 33, 34],
# [41, 42, 43, 44],
# [51, 52, 53, 54],
# [61, 62, 63, 64],
# [71, 72, 73, 74]
# ])
#
# y = np.array([1, 1, 0, 0, 1, 1, 0, 0])
# sfolder = StratifiedKFold(n_splits=2, random_state=42, shuffle=True)
# print("-------------------tt----------------------------")
# count = 0
#
# for train, test in sfolder.split(X, y):
# count += 1
# print("count:", count)
# print("train_index:", train_index, "test_index:", test_index)
# y_train, y_test = y[train_index], y[test_index]
# X_train, X_test = X[train_index], X[test_index]
#
# print("X_train:", X_train, "y_train:", y_train)
# print("X_test:", X_test, "y_test", y_test)
# break
#
StratifiedKFold,有时间 在学学原理。
最新推荐文章于 2024-06-03 13:29:05 发布