def model_xgb(train,test):
“”"xgb模型
Args:
Returns:
"""
#xgb参数
params={'booster': 'gbtree',
'objective': 'binary:logistic',
'eval_metric': 'auc',
'silent':1,
'eta':0.01,
'max_depth':5,
'min_child_weight':1,
'gamma':0,
'lambda':1,
'colsample_bylevel':0.7,
'colsample_bytree':0.7,
'subsample':0.9,
'scale_pos_weight':1}
#数据集
dtrain=xgb.DMatrix(train.drop(['ID','date','label'],axis=1),label=train['label'])
dtest=xgb.DMatrix(test.drop(['ID','date'],axis=1)) #测试集特征
#训练
watchlist=[(dtrain,'train')]
model=xgb.train(params,dtrain,num_boost_round=4000,evals=watchlist)
#预测
predict=model.predict(dtest)
#处理
predict=pd.DataFrame(predict,columns=['prob'])
result=pd.concat([test[['ID']],predict],axis=1)
#验证处理
feat_importance=pd.DataFrame(columns=['feature_name','importance'])
feat_importance['feature_name']=model.get_score().keys()
feat_importance['importance']=model.get_score().values()
feat_importance.sort_values(['importance'],ascending=False,inplace=True)
#返回
return result,feat_importance
if name==‘main’:
#线下验证
result,importance=model_xgb(train,test)
#最终结果
result.to_csv(r’submission2.csv’,index=False,header= [‘ID’,‘label’])
import seaborn as sns
import matplotlib.pyplot as plt
from pyecharts import Bar,Line
plt.figure(figsize=(26,20))
mcorr = train.corr(method = ‘pearson’)
mask = np.zeros_like(mcorr,dtype = np.bool)
mask[np.triu_indices_from(mask)] = True
cmap = sns.diverging_palette(220,10,as_cmap=True)
g = sns.heatmap(mcorr,mask=mask,cmap=cmap,square=True,annot=True,fmt=‘0.2f’)
plt.show()
def get_simple_feature(label_field):
data = label_field.copy()
data[‘Coupon_id’] = data[‘Coupon_id’].map(int)#将存在的空值转换为int型
data[‘Date_received’] = data[‘Date_received’].map(int)
data[‘cnt’] = 1 #方便特征提取
#返回的特征数据集
feature = data.copy()
#用户领券数
keys = ['User_id'] #主键
prefixs = 'simple_' + '_'.join(keys) + '_' #特征名前缀,由simple和主键组成
pivot = pd.pivot_table(data,index = keys,values = 'cnt',aggfunc = len) #以keys为键,'cnt'为值,使用len统计出现的次数
pivot = pd.DataFrame(pivot).rename(columns = {'cnt' : prefixs + 'receive_cnt'}).reset_index()
feature = pd.merge(feature,pivot,on = keys,how = 'left') #将id列与特征列左连
#用户领取特定优惠券数
keys = ['User_id','Coupon_id'] #主键
prefixs = 'simple_' + '_'.join(keys) + '_'
pivot = pd.pivot_table(data,index = keys,values = 'cnt',aggfunc = len)
pivot = pd.DataFrame(pivot).rename(columns = {'cnt' : prefixs + 'receive_cnt'}).reset_index()
feature = pd.merge(feature,pivot,on = keys,how = 'left')