LGB的两种写法

方法一

import lightgbm as lgb
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score

# 读取训练集和测试集数据
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

# 分割特征和标签
X_train = train_data.drop('label', axis=1)
y_train = train_data['label']

# 创建 LightGBM 数据集
train_dataset = lgb.Dataset(X_train, label=y_train)

# 设置模型参数
params = {
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'metric': 'binary_logloss',
    'num_leaves': 31,
    'learning_rate': 0.05
}

# 定义 k-fold 交叉验证
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# 用于存储每个 fold 的预测结果
test_predictions = []

# 进行 k-fold 交叉验证
for train_index, val_index in kfold.split(X_train):
    # 划分训练集和验证集
    X_train_fold, X_val_fold = X_train.iloc[train_index], X_train.iloc[val_index]
    y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]

    # 创建当前 fold 的 LightGBM 数据集
    train_fold_dataset = lgb.Dataset(X_train_fold, label=y_train_fold)
    val_fold_dataset = lgb.Dataset(X_val_fold, label=y_val_fold)

    # 训练模型
    model = lgb.train(params, train_fold_dataset, num_boost_round=100, valid_sets=[val_fold_dataset], early_stopping_rounds=10, verbose_eval=10)

    # 在验证集上进行预测
    val_predictions = model.predict(X_val_fold, num_iteration=model.best_iteration)

    # 将当前 fold 的预测结果加入列表
    test_predictions.append(val_predictions)

# 计算 k-fold 预测结果的平均值
final_predictions = sum(test_predictions) / len(test_predictions)

# 二分类问题的阈值处理
final_predictions = (final_predictions > 0.5).astype(int)

# 在测试集上进行预测
X_test = test_data  # 假设测试集特征和训练集格式相同
y_test_predictions = model.predict(X_test, num_iteration=model.best_iteration)

# 二分类问题的阈值处理
y_test_predictions = (y_test_predictions > 0.5).astype(int)

# 输出测试集预测结果
print(y_test_predictions)

方法二

import lightgbm as lgb
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

# 读取训练集和测试集数据
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

# 分割特征和标签
X_train = train_data.drop('label', axis=1)
y_train = train_data['label']

# 定义模型参数
params = {
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'metric': 'binary_logloss',
    'num_leaves': 31,
    'learning_rate': 0.05
}

# 定义 k-fold 交叉验证
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# 用于存储每个 fold 的预测结果
test_predictions = []

# 进行 k-fold 交叉验证
for train_index, val_index in kfold.split(X_train):
    # 划分训练集和验证集
    X_train_fold, X_val_fold = X_train.iloc[train_index], X_train.iloc[val_index]
    y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]

    # 创建 LightGBM 模型
    model = lgb.LGBMClassifier(**params)

    # 训练模型
    model.fit(X_train_fold, y_train_fold)

    # 在验证集上进行预测
    val_predictions = model.predict(X_val_fold)

    # 将当前 fold 的预测结果加入列表
    test_predictions.append(val_predictions)

# 计算 k-fold 预测结果的平均值
final_predictions = sum(test_predictions) / len(test_predictions)

# 二分类问题的阈值处理
final_predictions = (final_predictions > 0.5).astype(int)

# 在测试集上进行预测
X_test = test_data  # 假设测试集特征和训练集格式相同
y_test_predictions = model.predict(X_test)

# 二分类问题的阈值处理
y_test_predictions = (y_test_predictions > 0.5).astype(int)

# 输出测试集预测结果
print(y_test_predictions)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值