DataFrame 数据集切分
#方式1
from sklearn.model_selection import train_test_split
# split train set and test set
data_train, data_test = train_test_split(data, test_size=0.2, random_state=1234)
# split validate set and test set
data_test, data_val = train_test_split(data_test, test_size=0.5, random_state=1234)
#方式2
n_ratings = data.shape[0]
test = np.random.choice(n_ratings, size=int(train_test_ratio * n_ratings),
replace=False)
test_idx = np.zeros(n_ratings, dtype=bool)
test_idx[test] = True
tp_test = data[test_idx]
tp_train = data[~test_idx]
n_ratings = tp_test.shape[0]
test_valid = np.random.choice(n_ratings, size=int(test_valid_ratio * n_ratings),
replace=False) ##replace:True表示可以取相同数字,False表示不可以取相同数字
valid_idx = np.zeros(n_ratings, dtype=bool)
valid_idx[test_valid] = True
tp_valid = tp_test[valid_idx]
tp_train = tp_test[~valid_idx]