# 银行存款比赛base_line

import warnings
warnings.filterwarnings(“ignore”)
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
import lightgbm as lgb
from sklearn.metrics import roc_auc_score
train = pd.read_csv(‘data/train_set.csv’)
test = pd.read_csv(‘data/test_set.csv’)
data = pd.concat([train,test])
feature = train.columns.tolist()
feature.remove(‘ID’)
feature.remove(‘y’)
object_columns = train.columns[train.dtypes == ‘object’].tolist()
num_columns = list(set(feature) - set(object_columns))
print (object_columns)
print (num_columns)
for col in object_columns:
data = pd.concat([data, pd.get_dummies(data[col], prefix=col+’_’)], axis=1)
data.drop(col, axis=1, inplace=True)
X_train = data[data[‘y’].notnull()]
X_test = data[data[‘y’].isnull()]

y_train = X_train[‘y’]
X_train.drop([‘ID’, ‘y’], axis=1, inplace=True)
result = pd.DataFrame({‘ID’: X_test[‘ID’]})
X_test.drop([‘ID’, ‘y’], axis=1, inplace=True)
scaler = StandardScaler()
scaler.fit(X_train[num_columns])
X_train[num_columns] = scaler.transform(X_train[num_columns])
X_test[num_columns] = scaler.transform(X_test[num_columns])
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

创建成lgb特征的数据集格式

lgb_train = lgb.Dataset(X_train, y_train) # 将数据保存到LightGBM二进制文件将使加载更快
lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train) # 创建验证数据

将参数写成字典下形式

params = {
‘task’: ‘train’,
‘boosting_type’: ‘gbdt’,
‘objective’: ‘binary’,
‘metric’: {‘auc’},
‘learning_rate’: 0.01,
‘is_unbalance’: True,
‘random_state’: 0,
‘verbose’: 0
}

print(‘Start training…’)
clf = lgb.train(params,
lgb_train,
valid_sets = lgb_val,
num_boost_round = 10000,
verbose_eval = 100,
early_stopping_rounds = 100)

print(‘Start predicting…’)

预测数据集

y_pred = clf.predict(X_val, num_iteration=clf.best_iteration) #如果在训练期间启用了早期停止,可以通过best_iteration方式从最佳迭代中获得预测

评估模型

print(roc_auc_score(y_val, y_pred))
y_test = clf.predict(X_test)
result[‘pred’] = y_test
print (result.head())
result.to_csv(‘data/submission.csv’, index=False)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值