skilearn-xgboost

 调用skilearn中的xgboost模型实现二分类完成demo如下,包含模型保存和导出。

import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn import datasets
from sklearn.metrics import accuracy_score
from sklearn import metrics
import time

#记录程序运行时间
start_time = time.time()

#读入数据
X = pd.read_csv("/Users/XXX/filter_feature_20000.csv",  header=0, nrows = 16000).values
y = pd.read_csv("/Users/XXX/Downloads/label_new_20000.csv", header=0, nrows = 16000).values


val_X = pd.read_csv("/Users/XXX/Downloads/filter_feature_20000.csv",  header=0).tail(4000).values
val_y = pd.read_csv("/Users/XXX/Downloads/label_new_20000.csv", header=0).tail(4000).values

test_X = pd.read_csv("/Users/XXX/Downloads/filter_feature_100000.csv",  header=0).values

params={
'booster':'gbtree',
'objective': 'binary:logistic', 
'gamma':0,  # 用于控制是否后剪枝的参数,越大越保守,一般0.1、0.2这样子。
'max_depth':6, # 构建树的深度,越大越容易过拟合
'lambda':1,  # 控制模型复杂度的权重值的L2正则化项参数,参数越大,模型越不容易过拟合。
'subsample':1, # 随机采样训练样本
'colsample_bytree':1, # 生成树时进行的列采样
'min_child_weight':1,
'silent':0,
'eta': 0.2,  # 如同学习率
'seed':1000,
'eval_metric': 'auc',
'nthread':3   # cpu 线程数
}

plst = list(params.items())
num_rounds =500 # 迭代次数


xgb_val = xgb.DMatrix(val_X,label=val_y)
xgb_train = xgb.DMatrix(X, label=y)
xgb_test = xgb.DMatrix(test_X)
# print(type(X))
# print(type(xgb_test))
# time.sleep(10000)

watchlist = [(xgb_train, 'train'),(xgb_val, 'val')]

# training model
# early_stopping_rounds 当设置的迭代次数较大时,early_stopping_rounds 可在一定的迭代次数内准确率没有提升就停止训练
model = xgb.train(plst, xgb_train, num_rounds, watchlist,early_stopping_rounds=100)

model.save_model('./xgb.model') # 用于存储训练出的模型

print model.dump_model
print ("best best_ntree_limit",model.best_ntree_limit)

#构造feature map文件
#featmap.txt: <featureid> <featurename> <q or i or int>
#feature id: 从0开始直到特征的个数为止,从小到大排列。
#featurename: 特征字段名
#i表示是二分类特征
#q表示数值变量,如年龄,时间等。
#int: 特征为整数(when int is hinted, the decision boundary will be integer)


file_name = 'feature_map.txt'   
outfile = open(file_name, 'w')   
for i, feat in enumerate(features):      
    outfile.write('{0}\t{1}\tq\n'.format(i, feat))  #feature type, use i for indicator and q for quantity  outfile.close()
outfile.close

    

# 保存模型为text或者json文件
# xgb.dump_model('xgboost_model.txt',fmap=file_name, with_stats=False)
model.dump_model('xgboost_model', file_name, with_stats = True,dump_format='json')

#输出运行时长
cost_time = time.time()-start_time
print ("xgboost success!",'\n',"cost time:",cost_time,"(s)")


# print ("跑到这里了model.predict")
preds = model.predict(xgb_test,ntree_limit=model.best_ntree_limit)
preds_list=preds.tolist()
preds_list_=[]
for item in preds_list:
    preds_list_.append([item])

np.savetxt('xgb_submission.csv',np.c_[range(1,len(tests)+1),preds],delimiter=',',header='ID,y',comments='',fmt='%d')
with open(os.path.join(current_dir, 'result.csv'), 'w', encoding='utf8', newline='') as csvfile:
    writer = csv.writer(csvfile)
   #先写入columns_name
    writer.writerow(["pre_y"])
        #写入多行用writerows
    writer.writerows(preds_list_)
#输出运行时长
cost_time = time.time()-start_time
print ("xgboost success!",'\n',"cost time:",cost_time,"(s)")

模型导出结果为txt如下:

booster[0]:
0:[c33<0.5] yes=1,no=2,missing=1,gain=659.56543,cover=4000
	1:[c1272<0.5] yes=3,no=4,missing=3,gain=81.151062,cover=1080.5
		3:[c3<0.5] yes=7,no=8,missing=7,gain=41.0314331,cover=865
			7:[c1712<0.5] yes=15,no=16,missing=15,gain=21.5354309,cover=731
				15:[c2133<0.5] yes=31,no=32,missing=31,gain=17.3103333,cover=553.75
					31:[c1768<0.5] yes=59,no=60,missing=59,gain=10.9403687,cover=465
						59:leaf=0.140259743,cover=422.5
						60:leaf=0.248275861,cover=42.5
					32:[c1449<0.5] yes=61,no=62,missing=61,gain=5.55184937,cover=88.75
						61:leaf=0.271571904,cover=73.75
						62:leaf=0.125,cover=15
				16:[c2038<0.5] yes=33,no=34,missing=33,gain=22.4194565,cover=177.25
					33:[c1354<0.5] yes=63,no=64,missing=63,gain=13.6695414,cover=94.5
						63:leaf=-0.0195439737,cover=75.75
						64:leaf=0.167088613,cover=18.75
					34:[c29<0.5] yes=65,no=66,missing=65,gain=16.6235809,cover=82.75
						65:leaf=0.0774193555,cover=45.5
						66:leaf=0.258823544,cover=37.25
			8:[c1506<0.5] yes=17,no=18,missing=17,gain=12.3612061,cover=134
				17:[c1271<0.5] yes=35,no=36,missing=35,gain=8.44068623,cover=114.25
					35:[c1716<0.5] yes=67,no=68,missing=67,gain=6.99994373,cover=69.5
						67:leaf=-0.0120171672,cover=57.25
						68:leaf=-0.173584908,cover=12.25
					36:[c2248<0.5] yes=69,no=70,missing=69,gain=9.48196793,cover=44.75
						69:leaf=0.0993939415,cover=40.25
						70:leaf=-0.181818187,cover=4.5
				18:[c1140<0.5] yes=37,no=38,missing=37,gain=3.07007694,cover=19.75
					37:[c2268<0.5] yes=71,no=72,missing=71,gain=2.29551125,cover=18.75
						71:leaf=0.205405399,cover=17.5
						72:leaf=-0.0444444455,cover=1.25
					38:leaf=-0.100000001,cover=1
		4:[c697<0.5] yes=9,no=10,missing=9,gain=6.76394653,cover=215.5
			9:[c779<0.5] yes=19,no=20,missing=19,gain=5.40783691,cover=197.5
				19:[c1938<0.5] yes=39,no=40,missing=39,gain=4.21405029,cover=190
					39:[c1679<0.5] yes=73,no=74,missing=73,gain=5.22348022,cover=185.5
						73:leaf=0.291745603,cover=183.75
						74:leaf=-0.0363636389,cover=1.75
					40:[c13<0.5] yes=75,no=76,missing=75,gain=4.87272739,cover=4.5
						75:leaf=0.200000003,cover=3
						76:leaf=-0.160000011,cover=1.5
				20:[c1118<0.5] yes=41,no=42,missing=41,gain=7.13214016,cover=7.5
					41:[c34<0.5] yes=77,no=78,missing=77,gain=3.28787899,cover=2.75
						77:leaf=0.100000001,cover=1
						78:leaf=-0.25454545,cover=1.75
					42:[c1329<0.5] yes=79,no=80,missing=79,gain=1.743083,cover=4.75
						79:leaf=0.300000012,cover=3
						80:leaf=0.0363636389,cover=1.75
			10:[c1603<0.5] yes=21,no=22,missing=21,gain=7.41825104,cover=18
				21:[c1169<0.5] yes=43,no=44,missing=43,gain=5.17241478,cover=16.75
					43:[c91<0.5] yes=81,no=82,missing=81,gain=5.05558014,cover=13.75
						81:leaf=0.264150947,cover=12.25
						82:leaf=-0.0800000057,cover=1.5
					44:[c1354<0.5] yes=83,no=84,missing=83,gain=3.34595942,cover=3
						83:leaf=0.109090917,cover=1.75
						84:leaf=-0.222222239,cover=1.25
				22:leaf=-0.222222239,cover=1.25
	2:[c1272<0.5] yes=5,no=6,missing=5,gain=277.877716,cover=2919.5
		5:[c2196<0.5] yes=11,no=12,missing=11,gain=126.877335,cover=2655.75
			11:[c1712<0.5] yes=23,no=24,missing=23,gain=37.1381226,cover=1669.5
				23:[c3<0.5] yes=45,no=46,missing=45,gain=33.6924438,cover=1266.75
					45:[c927<0.5] yes=85,no=86,missing=85,gain=23.6303349,cover=653.25
						85:leaf=-0.0463382155,cover=562.25
						86:leaf=0.0630434826,cover=91
					46:[c1273<0.5] yes=87,no=88,missing=87,gain=27.3742218,cover=613.5
						87:leaf=-0.107198611,cover=575.5
						88:leaf=0.0666666701,cover=38
				24:[c1290<0.5] yes=47,no=48,missing=47,gain=13.1829071,cover=402.75
					47:[c7<0.5] yes=89,no=90,missing=89,gain=10.1228333,cover=394.75
						89:leaf=-0.152526796,cover=325.5
						90:leaf=-0.0669039115,cover=69.25
					48:[c26<0.5] yes=91,no=92,missing=91,gain=6.54553127,cover=8
						91:leaf=0.273684233,cover=3.75
						92:leaf=-0.0571428612,cover=4.25
			12:[c927<0.5] yes=25,no=26,missing=25,gain=39.9882202,cover=986.25
				25:[c12<0.5] yes=49,no=50,missing=49,gain=22.2378578,cover=754.25
					49:[c1337<0.5] yes=93,no=94,missing=93,gain=15.014286,cover=141.25
						93:leaf=0.0289361719,cover=116.5
						94:leaf=0.198058248,cover=24.75
					50:[c2023<0.5] yes=95,no=96,missing=95,gain=18.6097565,cover=613
						95:leaf=-0.0151431207,cover=540.5
						96:leaf=-0.122448981,cover=72.5
				26:[c1719<0.5] yes=51,no=52,missing=51,gain=13.9255447,cover=232
					51:[c2153<0.5] yes=97,no=98,missing=97,gain=9.67037582,cover=198.75
						97:leaf=0.0775453299,cover=178.25
						98:leaf=-0.0651162788,cover=20.5
					52:[c1315<0.5] yes=99,no=100,missing=99,gain=13.5209045,cover=33.25
						99:leaf=0.266666681,cover=26
						100:leaf=-0.0363636389,cover=7.25
		6:[c549<0.5] yes=13,no=14,missing=13,gain=17.6135254,cover=263.75
			13:[c118<0.5] yes=27,no=28,missing=27,gain=8.44517517,cover=245.75
				27:[c1290<0.5] yes=53,no=54,missing=53,gain=7.69490051,cover=244.5
					53:[c2059<0.5] yes=101,no=102,missing=101,gain=6.32978821,cover=170
						101:leaf=0.16711916,cover=164.75
						102:leaf=-0.0480000004,cover=5.25
					54:[c293<0.5] yes=103,no=104,missing=103,gain=6.55356598,cover=74.5
						103:leaf=0.249158248,cover=73.25
						104:leaf=-0.13333334,cover=1.25
				28:leaf=-0.222222239,cover=1.25
			14:[c2189<0.5] yes=29,no=30,missing=29,gain=4.91767883,cover=18
				29:[c2091<0.5] yes=55,no=56,missing=55,gain=6.63676596,cover=15.25
					55:leaf=0.24000001,cover=1.5
					56:[c1302<0.5] yes=105,no=106,missing=105,gain=7.41748428,cover=13.75
						105:leaf=-0.200000003,cover=10
						106:leaf=0.105263166,cover=3.75
				30:[c1290<0.5] yes=57,no=58,missing=57,gain=1.18787885,cover=2.75
					57:leaf=0.25454545,cover=1.75
					58:leaf=-0,cover=1
booster[1]:
0:[c33<0.5] yes=1,no=2,missing=1,gain=427.845093,cover=3982.50781
	1:[c1272<0.5] yes=3,no=4,missing=3,gain=54.8223572,cover=1070.89758
		3:[c2196<0.5] yes=7,no=8,missing=7,gain=30.242691,cover=859.639832
			7:[c1271<0.5] yes=15,no=16,missing=15,gain=13.0224113,cover=281.554596
				15:[c1475<0.5] yes=31,no=32,missing=31,gain=13.6810684,cover=146.456284
					31:[c2076<0.5] yes=57,no=58,missing=57,gain=10.0567083,cover=96.4491196
						57:leaf=0.0157024488,cover=59.3216629
						58:leaf=-0.11572402,cover=37.1274529
					32:[c1618<0.5] yes=59,no=60,missing=59,gain=4.72074223,cover=50.0071716
						59:leaf=0.0694069788,cover=45.5354538
						60:leaf=0.270091563,cover=4.4717207
				16:[c1447<0.5] yes=33,no=34,missing=33,gain=7.08228302,cover=135.098312
					33:[c1315<0.5] yes=61,no=62,missing=61,gain=6.59377289,cover=133.609009
						61:leaf=0.121101692,cover=108.705887
						62:leaf=0.00682065869,cover=24.9031239
					34:leaf=-0.245720625,cover=1.48929155
			8:[c1952<0.5] yes=17,no=18,missing=17,gain=13.0849762,cover=578.085205
				17:[c481<0.5] yes=35,no=36,missing=35,gain=12.0555115,cover=435.25177
					35:[c32<0.5] yes=63,no=64,missing=63,gain=11.2629852,cover=390.296692
						63:leaf=0.10475228,cover=303.488831
						64:leaf=0.187419161,cover=86.807869
					36:[c668<0.5] yes=65,no=66,missing=65,gain=7.5410738,cover=44.9550896
						65:leaf=-0.0259418767,cover=36.7734833
						66:leaf=0.176206753,cover=8.18160725
				18:[c2057<0.5] yes=37,no=38,missing=37,gain=6.20710754,cover=142.83345
					37:[c63<0.5] yes=67,no=68,missing=67,gain=3.84857941,cover=44.1217384
						67:leaf=0.257277936,cover=42.8795776
						68:leaf=-0.0592717826,cover=1.24215901
					38:[c85<0.5] yes=69,no=70,missing=69,gain=7.20003128,cover=98.7117157
						69:leaf=0.161803916,cover=95.7366409
						70:leaf=-0.123544648,cover=2.97508097
		4:[c9<0.5] yes=9,no=10,missing=9,gain=5.99909973,cover=211.257812
			9:[c744<0.5] yes=19,no=20,missing=19,gain=5.7590332,cover=208.557236
				19:[c1680<0.5] yes=39,no=40,missing=39,gain=5.66558838,cover=200.946335
					39:[c1938<0.5] yes=71,no=72,missing=71,gain=5.28115845,cover=197.488068
						71:leaf=0.237334177,cover=193.272552
						72:leaf=0.0088988198,cover=4.21552372
					40:[c1992<0.5] yes=73,no=74,missing=73,gain=3.19810057,cover=3.45826054
						73:leaf=-0.156537995,cover=1.97446072
						74:leaf=0.151286513,cover=1.48379982
				20:[c2267<0.5] yes=41,no=42,missing=41,gain=5.3044486,cover=7.61089611
					41:leaf=-0.232033879,cover=1.22718644
					42:[c2243<0.5] yes=75,no=76,missing=75,gain=4.38424873,cover=6.38370943
						75:leaf=0.197442099,cover=5.14841366
						76:leaf=-0.141550943,cover=1.23529589
			10:[c2040<0.5] yes=21,no=22,missing=21,gain=2.46140671,cover=2.70058417
				21:leaf=0.107783794,cover=1.22893667
				22:leaf=-0.186112493,cover=1.47164738
	2:[c1272<0.5] yes=5,no=6,missing=5,gain=179.928802,cover=2911.61011
		5:[c2196<0.5] yes=11,no=12,missing=11,gain=81.6617813,cover=2650.31323
			11:[c1273<0.5] yes=23,no=24,missing=23,gain=25.5287323,cover=1665.37952
				23:[c1271<0.5] yes=43,no=44,missing=43,gain=39.3595123,cover=1561.60522
					43:[c2057<0.5] yes=77,no=78,missing=77,gain=19.5219879,cover=763.56073
						77:leaf=-0.0642930269,cover=315.744019
						78:leaf=-0.129576445,cover=447.816742
					44:[c2036<0.5] yes=79,no=80,missing=79,gain=31.8942204,cover=798.044434
						79:leaf=-0.0159163754,cover=597.888855
						80:leaf=-0.108017288,cover=200.155563
				24:[c1274<0.5] yes=45,no=46,missing=45,gain=9.23001671,cover=103.774307
					45:[c1712<0.5] yes=81,no=82,missing=81,gain=8.56442451,cover=97.0483093
						81:leaf=0.0528198071,cover=68.9394989
						82:leaf=-0.0763231069,cover=28.1088161
					46:[c1394<0.5] yes=83,no=84,missing=83,gain=1.092803,cover=6.72599506
						83:leaf=0.290282816,cover=4.7329421
						84:leaf=0.0696449354,cover=1.9930532
			12:[c29<0.5] yes=25,no=26,missing=25,gain=28.1107922,cover=984.933838
				25:[c927<0.5] yes=47,no=48,missing=47,gain=15.08564,cover=708.926697
					47:[c1315<0.5] yes=85,no=86,missing=85,gain=14.1605644,cover=555.629639
						85:leaf=-0.0119863721,cover=448.730988
						86:leaf=-0.0927194133,cover=106.898651
					48:[c2252<0.5] yes=87,no=88,missing=87,gain=8.26338768,cover=153.297028
						87:leaf=0.0316573568,cover=145.855804
						88:leaf=0.235712335,cover=7.44123077
				26:[c2023<0.5] yes=49,no=50,missing=49,gain=12.1558895,cover=276.007141
					49:[c1938<0.5] yes=89,no=90,missing=89,gain=11.8624725,cover=247.600189
						89:leaf=0.0864320472,cover=236.885422
						90:leaf=-0.120999791,cover=10.7147703
					50:[c1712<0.5] yes=91,no=92,missing=91,gain=5.94400311,cover=28.4069595
						91:leaf=0.019198468,cover=16.4460449
						92:leaf=-0.160894528,cover=11.9609146
		6:[c544<0.5] yes=13,no=14,missing=13,gain=12.1526718,cover=261.296814
			13:[c2032<0.5] yes=27,no=28,missing=27,gain=6.42970276,cover=246.43045
				27:[c118<0.5] yes=51,no=52,missing=51,gain=6.42436218,cover=235.034409
					51:[c1671<0.5] yes=93,no=94,missing=93,gain=6.46975708,cover=233.799713
						93:leaf=0.166887745,cover=215.226166
						94:leaf=0.0412377529,cover=18.5735512
					52:leaf=-0.198985606,cover=1.234694
				28:[c2196<0.5] yes=53,no=54,missing=53,gain=7.55905104,cover=11.3960485
					53:leaf=-0.253086418,cover=2.48001337
					54:[c2272<0.5] yes=95,no=96,missing=95,gain=5.57294369,cover=8.91603565
						95:leaf=0.170312315,cover=6.93561363
						96:leaf=-0.155639395,cover=1.98042178
			14:[c1057<0.5] yes=29,no=30,missing=29,gain=5.63266706,cover=14.8663702
				29:[c793<0.5] yes=55,no=56,missing=55,gain=5.00396013,cover=11.8914061
					55:[c2034<0.5] yes=97,no=98,missing=97,gain=4.62844086,cover=8.66823292
						97:leaf=-0.0177141353,cover=5.20225906
						98:leaf=0.256727427,cover=3.46597409
					56:[c606<0.5] yes=99,no=100,missing=99,gain=1.0299964,cover=3.22317266
						99:leaf=-0.237570032,cover=1.48697209
						100:leaf=-0.0221777,cover=1.73620045
				30:leaf=-0.24320662,cover=2.97496462

 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 4
    评论
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值