# Step 1) 样本准备
# -----------------------------------------------------
import pandas as pd;
# 1-1) 从csv导入样本集
strModelIdx = "allClass";
netFlow_data = pd.read_csv(filepath_or_buffer=args.input+'.csv');
print("Sample set size:", netFlow_data.shape);
# 1-2) 取出特征部分,和标签部分
X = netFlow_data[['F00','F01','F02','F03','F04','F20','F21','F22','F23','F24','F25','F26','F27','F28','F29','F30','F31','F32','F33','F34','F35','F36','F37','F38','F39']];
y = netFlow_data[['CLASS']];
#print( X, y );
# 1-3) 划分训练集和测试集
from sklearn.model_selection import train_test_split;
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1);
print("Train set size, Test set size:",X_train.shape, y_train.shape, X_test.shape, y_test.shape);
# =======================================================
# 怎么转换成 numpy
y_train.to_numpy();
【python】【codelet】从csv文件读出样本集,分割好准备训练
最新推荐文章于 2022-04-09 19:52:12 发布