内置方式建模要把数据读取成Dataset格式
lgb.train去训练
# coding: utf-8
import json
import lightgbm as lgb
import pandas as pd
from sklearn.metrics import mean_squared_error
# 加载数据集合
print('加载数据...')
df_train = pd.read_csv('../data/regression.train.txt', header=None, sep='\t')
df_test = pd.read_csv('../data/regression.test.txt', header=None, sep='\t')
# 设定训练集和测试集
y_train = df_train[0].values
y_test = df_test[0].values
X_train = df_train.drop(0, axis=1).values
X_test = df_test.drop(0, axis=1).values
# 构建lgb中的Dataset格式
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
# 敲定好一组参数
params = {
'task': 'train',
'boosting_type': 'gbdt',
'objective': 'regression',
'metric': {'l2', 'auc'},
'num_leaves': 31,
'learning_rate': 0.05,
'feature_fraction': 0.9,
'bagging_fraction': 0.8,
'bagging_freq': 5,
'verbose': 0
}
print('开始训练...')
# 训练
gbm = lgb.train(params,
lgb_train,
num_boost_round=20,
valid_sets=lgb_eval,
early_stopping_rounds=5)
# 保存模型
print('保存模型...')
# 保存模型到文件中
gbm.save_model('../../tmp/model.txt')
print('开始预测...')
# 预测
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
# 评估
print('预估结果的rmse为:')
print(mean_squared_error(y_test, y_pred) ** 0.5)添加样本权重训练
# coding: utf-8
import json
import lightgbm as lgb
import pandas as pd
import num