StratifiedKFold,有时间 在学学原理。

# import numpy as np
# from sklearn.model_selection import KFold,StratifiedKFold
# X=np.array([
#     [1,2,3,4],
#     [11,12,13,14],
#     [21,22,23,24],
#     [31,32,33,34],
#     [41,42,43,44],
#     [51,52,53,54],
#     [61,62,63,64],
#     [71,72,73,74]
# ])
#
# y=np.array([1,1,0,0,1,1,0,0])
# #n_folds这个参数没有,引入的包不同,
# floder = KFold(n_splits=4,random_state=0,shuffle=False)
# sfolder = StratifiedKFold(n_splits=4,random_state=0,shuffle=False)
# print("---------------------StratifiedKFold---------------------------------")
# for train, test in sfolder.split(X,y):
#     print('Train: %s | test: %s' % (train, test))
#     print(" ")
# print("-----------------------------KFold------------------------------------")
# for train, test in floder.split(X,y):
#     print('Train: %s | test: %s' % (train, test))
#     print(" ")
#
# print("----------------------StratifiedKFold-----------------------------------")
#
# from sklearn.model_selection import StratifiedKFold
# X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [5, 6],[7, 8]])
# y = np.array([0, 0, 0, 1, 1, 1])#1的个数和0的个数要大于3,3也就是n_splits
# skf = StratifiedKFold(n_splits=2)
# # 结果有三行,和n_splits数相同
# for train_index, test_index in skf.split(X, y):
#    print("TRAIN:", train_index, "TEST:", test_index)
#
# print("-----------------------------------------------------------------------")
#
# # coding:utf-8
# import numpy as np
# from sklearn.model_selection import KFold,StratifiedKFold
#
# X=np.array([
#     ['a', 2, 122.21, 4],
#     ['b', 3, 132.12, 14],
#     ['c', 31, 155.33, 24],
#     ['d', 12, 143.93, 34],
#     ['c', 32, 124.31, 44],
#     ['a', 1, 151.11, 54],
#     ['b', 11, 112.33, 64],
#     ['b', 21, 137.82, 74]
# ])
#
# y=np.array([1,1,0,0,1,1,0,0])
# sfolder = StratifiedKFold(n_splits=3,random_state=0,shuffle=False)
# floder = KFold(n_splits=3,random_state=0,shuffle=False)
#
# for train_index, test_index in sfolder.split(X, y):
#     print("train_index:", train_index, "test_index:", test_index)
#     y_train, y_test = y[train_index], y[test_index]
#     X_train, X_test = X[train_index], X[test_index]
#
#
#     print("X_train:", X_train, "y_train:", y_train)
#     print("X_test:", X_test, "y_test", y_test)
#
#
#
#
#
#
#  # Stratified K-Folds cross-validator
#  #
#  #    Provides train/test indices to split data in train/test sets.
#  #
#  #    This cross-validation object is a variation of KFold that returns
#  #    stratified folds. The folds are made by preserving the percentage of
#  #    samples for each class.
#  #
#  #    Read more in the :ref:`User Guide <cross_validation>`.
#  #
#  #    Parameters
#  #    ----------
#  #    n_splits : int, default=3
#  #        Number of folds. Must be at least 2.
#  #
#  #    shuffle : boolean, optional
#  #        Whether to shuffle each stratification of the data before splitting
#  #        into batches.
#  #
#  #    random_state : int, RandomState instance or None, optional, default=None
#  #        If int, random_state is the seed used by the random number generator;
#  #        If RandomState instance, random_state is the random number generator;
#  #        If None, the random number generator is the RandomState instance used
#  #        by `np.random`. Used when ``shuffle`` == True.
#
# """
# from sklearn.model_selection import StratifiedKFold
# X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
# y = np.array([0, 0, 1, 1])
# skf = StratifiedKFold(n_splits=2)
# temp = skf.get_n_splits(X, y)
# print("temp:", temp)
# print(skf)  # doctest: +NORMALIZE_WHITESPACE
# print("-------------------------------------------------------")
# StratifiedKFold(n_splits=2, random_state=None, shuffle=False)
# for train_index, test_index in skf.split(X, y):
#     print("train_index:", train_index, "test_index:", test_index)
#     y_train, y_test = y[train_index], y[test_index]
#     X_train, X_test = X[train_index], X[test_index]
#
#
#     print("X_train:", X_train, "y_train:", y_train)
#     print("X_test:", X_test, "y_test", y_test)
# """
#
# import numpy as np
# from sklearn.model_selection import KFold, StratifiedKFold
#
# X = np.array([
#     [1, 2, 3, 4],
#     [11, 12, 13, 14],
#     [21, 22, 23, 24],
#     [31, 32, 33, 34],
#     [41, 42, 43, 44],
#     [51, 52, 53, 54],
#     [61, 62, 63, 64],
#     [71, 72, 73, 74]
# ])
#
# y = np.array([1, 1, 0, 0, 1, 1, 0, 0])
# sfolder = StratifiedKFold(n_splits=2, random_state=42, shuffle=True)
# print("-------------------tt----------------------------")
# count = 0
#
# for train, test in sfolder.split(X, y):
#     count += 1
#     print("count:", count)
#     print("train_index:", train_index, "test_index:", test_index)
#     y_train, y_test = y[train_index], y[test_index]
#     X_train, X_test = X[train_index], X[test_index]
#
#     print("X_train:", X_train, "y_train:", y_train)
#     print("X_test:", X_test, "y_test", y_test)
#     break
#



  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值