我们选择使用以下几种模型进行建置,并比较模型的分类效能。
首先在将训练集划分为训练集和验证集,其中训练集用于训练模型,验证集用于验证模型效果。首先导入建模库:# 建模
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from lightgbm import LGBMClassifier
# 预处理
from sklearn.preprocessing import StandardScaler, MinMaxScaler
# 模型评估
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, f1_score, roc_auc_score# 划分特征和标签
X = train.drop(['id', 'Response'], axis=1)
y = train['Response']
# 划分训练集和验证集(分层抽样)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, rand