import numpy as np
from sklearn.model_selection import StratifiedKFold
import os
def generate_data(random_state = 1, K = 5):
skf = StratifiedKFold(n_splits=K, shuffle=True, random_state=random_state)
i = 0
for train_index, dev_index in skf.split(X, y):
print(i, "TRAIN:", train_index, "TEST:", dev_index)
DATA_DIR = "./data_StratifiedKFold_{}/data_origin_{}/".format(random_state,i)
if not os.path.exists(DATA_DIR):
os.makedirs(DATA_DIR)
tmp_train_df = train_df.iloc[train_index]
tmp_dev_df = train_df.iloc[dev_index]
test_df.to_csv(DATA_DIR+"test.csv")
tmp_train_df.to_csv(DATA_DIR + "train.csv")
tmp_dev_df.to_csv(DATA_DIR+"dev.csv")
print(tmp_train_df.shape, tmp_dev_df.shape)
i+=1