Xgboost
#!/usr/bin/python
import numpy as np
#import scipy.sparse
import pickle
import xgboost as xgb
# 基本例子,从libsvm文件中读取数据,做二分类
# 数据是libsvm的格式
#1 3:1 10:1 11:1 21:1 30:1 34:1 36:1 40:1 41:1 53:1 58:1 65:1 69:1 77:1 86:1 88:1 92:1 95:1 102:1 105:1 117:1 124:1
#0 3:1 10:1 20:1 21:1 23:1 34:1 36:1 39:1 41:1 53:1 56:1 65:1 69:1 77:1 86:1 88:1 92:1 95:1 102:1 106:1 116:1 120:1
#0 1:1 10:1 19:1 21:1 24:1 34:1 36:1 39:1 42:1 53:1 56:1 65:1 69:1 77:1 86:1 88:1 92:1 95:1 102:1 106:1 116:1 122:1
dtrain = xgb.DMatrix('./data/agaricus.txt.train')
dtest = xgb.DMatrix('./data/agaricus.txt.test')
#超参数设定
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
# 设定watchlist用于查看模型状态
watchlist = [(dtest,'eval'), (dtrain,'train')]
num_round = 2
bst = xgb.train(param, dtrain, num_round, watchlist)
# 使用模型预测
preds = bst.predict(dtest)
# 判断准确率
labels = dtest.get_label()
print ('错误类为%f' % \
(sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds))))
# 模型存储
bst.save_model('./model/0001.model')
LightGBM评分卡
from pyecharts.charts import *
from pyecharts import options as opts
from pylab import *
mpl.rcParams['font.sans-serif'] = ['SimHei']
np.set_printoptions(suppress=True)
pd.set_option('display.unicode.ambiguous_as_wide', True)
pd.set_option('display.unicode.east_asian_width', True)
line = (
Line()
.add_xaxis(list(val_repot.index))
.add_yaxis(
"分组坏人占比",
list(val_repot.BADRATE),
yaxis_index=0,
color="red",
)
.set_global_opts(
title_opts=opts.TitleOpts(title="行为评分卡模型表现"),
)
.extend_axis(
yaxis=opts.AxisOpts(
name="累计坏人占比",
type_="value",
min_=0,
max_=0.5,
position="right",
axisline_opts=opts.AxisLineOpts(
linestyle_opts=opts.LineStyleOpts(color="red")
),
axislabel_opts=opts.LabelOpts(formatter="{value}"),
)
)
.add_xaxis(list(val_repot.index))
.add_yaxis(
"KS",
list(val_repot['KS']),
yaxis_index=1,
color="blue",
label_opts=opts.LabelOpts(is_show=False),
)
)
line.render_notebook()