方法一
import lightgbm as lgb
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')
X_train = train_data.drop('label', axis=1)
y_train = train_data['label']
train_dataset = lgb.Dataset(X_train, label=y_train)
params = {
'boosting_type': 'gbdt',
'objective': 'binary',
'metric': 'binary_logloss',
'num_leaves': 31,
'learning_rate': 0.05
}
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
test_predictions = []
for train_index, val_index in kfold.split(X_train):
X_train_fold, X_val_fold = X_train.iloc[train_index], X_train.iloc[val_index]
y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]
train_fold_dataset = lgb.Dataset(X_train_fold, label=y_train_fold)
val_fold_dataset = lgb.Dataset(X_val_fold, label=y_val_fold)
model = lgb.train(params, train_fold_dataset, num_boost_round=100, valid_sets=[val_fold_dataset], early_stopping_rounds=10, verbose_eval=10)
val_predictions = model.predict(X_val_fold, num_iteration=model.best_iteration)
test_predictions.append(val_predictions)
final_predictions = sum(test_predictions) / len(test_predictions)
final_predictions = (final_predictions > 0.5).astype(int)
X_test = test_data
y_test_predictions = model.predict(X_test, num_iteration=model.best_iteration)
y_test_predictions = (y_test_predictions > 0.5).astype(int)
print(y_test_predictions)
方法二
import lightgbm as lgb
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')
X_train = train_data.drop('label', axis=1)
y_train = train_data['label']
params = {
'boosting_type': 'gbdt',
'objective': 'binary',
'metric': 'binary_logloss',
'num_leaves': 31,
'learning_rate': 0.05
}
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
test_predictions = []
for train_index, val_index in kfold.split(X_train):
X_train_fold, X_val_fold = X_train.iloc[train_index], X_train.iloc[val_index]
y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]
model = lgb.LGBMClassifier(**params)
model.fit(X_train_fold, y_train_fold)
val_predictions = model.predict(X_val_fold)
test_predictions.append(val_predictions)
final_predictions = sum(test_predictions) / len(test_predictions)
final_predictions = (final_predictions > 0.5).astype(int)
X_test = test_data
y_test_predictions = model.predict(X_test)
y_test_predictions = (y_test_predictions > 0.5).astype(int)
print(y_test_predictions)