import json
import lightgbm as lgb
import pandas as pd
from sklearn.metrics import mean_squared_error
print('load data...')
#如果带中文字符,那么就会报错,路径不能带中文
df_train = pd.read_csv('D:/2345download/LightGBM-master/examples/regression/regression.train',header=None,sep='\t')
df_test = pd.read_csv('D:/2345download/LightGBM-master/examples/regression/regression.test',header =None,sep='\t')
y_train = df_train[0].values
y_test = df_test[0].values
X_train = df_train.drop(0,axis=1).values
X_test = df_test.drop(0,axis=1).values
#create dataset for lightgbm
#传进来的是dataframe或者series
lgb_train = lgb.Dataset(X_train,y_train)
lgb_eval = lgb.Dataset(X_test,y_test,reference=lgb_train)
#specify your configureations as a dict
params = {
'task': 'train',
'boosting_type': 'gbdt',
'objective': 'regression',
'metric': {'l2', 'auc'},
'num_leaves': 31,
'learning_rate': 0.05,
'feature_fraction': 0.9,
'bagging_fraction': 0.8,
'bagging_freq': 5,
'verbose': 0
}
print('Starting training...')
#train
gbm = lgb.train(params,lgb_train,
num_boost_round=20,
valid_sets=lgb_eval,
early_stopping_rounds=5)
print('Save model ...')
gbm.save_model('model.txt')
print('Start predict')
y_pred = gbm.predict(X_test,num_iteration=gbm.best_iteration)
#eva
print('The rmse of prediction is:',mean_squared_error(y_test,y_pred)**0.5)
评价:这个用gbt进行分类预测的代码,官网的simple_example模型代码,bgt有很多参数。现在总结下
'num_leaves': 31,
树的深度
'bagging_fraction': 0.8,
'bagging_freq': 5,
bagging的初始值和衰减速率
'learning_rate': 0.05,
这个是学习步长的衰减率
其余的没懂
用这个预测iris数据集,准确率100%!!!
from sklearn import datasets
import lightgbm as lgb
import numpy as np
import json
import lightgbm as lgb
import pandas as pd
from sklearn.metrics import mean_squared_error
data = datasets.load_iris()
X_train = np.vstack((data['data'][0:40],data['data'][50:90],data['data'][100:140])).tolist()
y_train = np.hstack((data['target'][0:40],data['target'][50:90],data['target'][100:140])).tolist()
X_test = np.vstack((data['data'][40:50],data['data'][90:100],data['data'][140:150])).tolist()
y_test = np.hstack((data['target'][40:50],data['target'][90:100],data['target'][140:150])).tolist()
lgb_train = lgb.Dataset(X_train,y_train)
lgb_eval = lgb.Dataset(X_test,y_test,reference=lgb_train)
#specify your configureations as a dict
params = {
'task': 'train',
'boosting_type': 'gbdt',
'objective': 'regression',
'metric': {'l2', 'auc'},
'num_leaves': 31,
'learning_rate': 0.05,
'feature_fraction': 0.9,
'bagging_fraction': 0.8,
'bagging_freq': 5,
'verbose': 0
}
print('Starting training...')
#train
gbm = lgb.train(params,lgb_train,
num_boost_round=20,
valid_sets=lgb_eval,
early_stopping_rounds=5)
print('Save model ...')
gbm.save_model('model.txt')
print('Start predict')
y_pred = gbm.predict(X_test,num_iteration=gbm.best_iteration)
#eva
print('The rmse of prediction is:',mean_squared_error(y_test,y_pred)**0.5)