day6

1.读取数据处理后的数据,分割数据

from sklearn.model_selection import GridSearchCV, KFold, train_test_split
import pandas as pd
import numpy as np
from sklearn.metrics import precision_score,roc_auc_score
from sklearn.metrics import accuracy_score,make_scorer
from sklearn.metrics import precision_score#这个就可以用评分函数了
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
data_all=pd.read_csv(r'C:\Users\lxy\Desktop\data.csv',encoding='gbk')
x_feature = list(data_all.columns)
x_feature.remove('status')
x_val = data_all[x_feature]
y_val = data_all['status']

2.lv函数

def calcWOE(dataset, col, target):
    subdata = df(dataset.groupby(col)[col].count())
    suby = df(dataset.groupby(col)[target].sum())
    data = df(pd.merge(subdata, suby, how='left', left_index=True, right_index=True))
    b_total = data[target].sum()
    total = data[col].sum()
    g_total = total - b_total
    data["bad"] = data.apply(lambda x:round(x[target]/b_total, 100), axis=1)
    data["good"] = data.apply(lambda x:round((x[col] - x[target])/g_total, 100), axis=1)
    data["WOE"] = data.apply(lambda x:log(x.bad / x.good), axis=1)
    return data.loc[:, ["bad", "good", "WOE"]]

def calcIV(dataset):
    print()
    dataset["IV"] = dataset.apply(lambda x:(x["bad"] - x["good"]) * x["WOE"], axis=1)
    IV = sum(dataset["IV"])
    return IV



col_list = [col for col in  data_all.drop(labels=['status'], axis=1)]
data_IV = df()
fea_iv = []

for col in col_list:
    col_WOE = calcWOE(data_all, col, "status")
    col_WOE = col_WOE[~col_WOE.isin([np.nan, np.inf, -np.inf]).any(1)]
    col_IV = calcIV(col_WOE)
    if col_IV > 0.1:
        data_IV[col] = [col_IV]
        fea_iv.append(col)

data_IV.to_csv('data_IV.csv', index=0)
print(fea_iv)

3.随机森林

rf= RandomForestClassifier()
rf.fit(x_val, y_val)
rfc_impc = pd.Series(rfc.feature_importances_, index=x_val.columns).sort_values(ascending=False)
fea_gini = rfc_impc[:20].index.tolist()

4.用lr分割前50个重要特征

from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import RFE
# 建立逻辑回归分类器
model = LogisticRegression()
# 建立递归特征消除筛选器
rfe = RFE(model, 20) #通过递归选择特征,选择20个特征
rfe = rfe.fit(x_val, y_val)
col_filter = x_val.columns[rfe.support_] #通过布尔值筛选首次降维后的变量

5.特征融合

features = list(set(fea_gini)|set(fea_iv)|set(col_filter))
X_final = data_all[features]

6.评分函数

def scores(model,x_train,y_train,x_test,y_test):
    model_score=model.fit(x_train,y_train)
    
    test_predict=model_score.predict(x_test)
    
    if hasattr(model,'decision_function'):
        pre_test=model_score.decision_function(x_test)
    else:
        pre_test=model_score.predict_proba(x_test)[:,1]
    print("准确率",accuracy_score(y_test,test_predict))
    print("精确率",precision_score(y_test,test_predict))
    print("召回率",recall_score(y_test,test_predict))
    print("F1-score",f1_score(y_test,test_predict))
    print("AUC",roc_auc_score(y_test,pre_test))
    false_positive_rate,true_positive_rate,thresholds=roc_curve(y_test,pre_test)
    roc_auc=auc(false_positive_rate, true_positive_rate)
    plt.title('ROC')
    plt.plot(false_positive_rate, true_positive_rate,'b',label='AUC = %0.4f'% roc_auc)
    plt.legend(loc='lower right')
    plt.plot([0,1],[0,1],'r--')
    plt.ylabel('TPR')
    plt.xlabel('FPR')

7.结果
1.lr

from sklearn.linear_model import LogisticRegression
lr =LogisticRegression(random_state=2018)
lr.fit(x_train,y_train)
scores(lr,x_train,y_train,x_test,y_test)
准确率 0.7876664330763841
精确率 0.6728395061728395
召回率 0.30362116991643456
F1-score 0.4184261036468331
AUC 0.7771300846087238

svm

from sklearn.svm import LinearSVC
svm=LinearSVC(random_state=2018)
scores(svm,x_train,y_train,x_test,y_test)
准确率 0.7806587245970568
精确率 0.6597222222222222
召回率 0.2646239554317549
F1-score 0.37773359840954274
AUC 0.779154017088667

3.决策树

from sklearn.tree import DecisionTreeClassifier
tree=DecisionTreeClassifier(random_state=2018)
scores(tree,x_train,y_train,x_test,y_test)
准确率 0.7028731604765242
精确率 0.4123989218328841
召回率 0.42618384401114207
F1-score 0.4191780821917809
AUC 0.6110319969119381

4.随机森林

from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=500,oob_score=True, random_state=2018)
scores(rf,x_train,y_train,x_test,y_test)
准确率 0.7862648913805186
精确率 0.68
召回率 0.2841225626740947
F1-score 0.4007858546168958
AUC 0.7648691225105109

5.lgb

import lightgbm as lgb
lgb_model = lgb.LGBMClassifier(boosting_type='GBDT',random_state=2018)
scores(lgb_model,x_train,y_train,x_test,y_test)
准确率 0.7764540995094604
精确率 0.5925925925925926
召回率 0.3565459610027855
F1-score 0.44521739130434784
AUC 0.7679624007594964

6.xgb

from xgboost.sklearn import XGBClassifier
xgbc = XGBClassifier(random_state=2018)
scores(xgbc,x_train,y_train,x_test,y_test)
准确率 0.7876664330763841
精确率 0.6386138613861386
召回率 0.3593314763231198
F1-score 0.45989304812834225
AUC 0.7723336776105078

7.gbdt

from sklearn.ensemble import GradientBoostingClassifier
gbdt = GradientBoostingClassifier(random_state=2018)
scores(gbdt,x_train,y_train,x_test,y_test)
准确率 0.7820602662929222
精确率 0.6153846153846154
召回率 0.3565459610027855
F1-score 0.45149911816578486
AUC 0.7691725871908027
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
C:\Users\admin\Desktop\前端开发\Node.js\day6\code\api_server\node_modules\mysql\lib\protocol\Parser.js:437 throw err; // Rethrow non-MySQL errors ^ Error: secretOrPrivateKey must have a value at module.exports [as sign] (C:\Users\admin\Desktop\前端开发\Node.js\day6\code\api_server\node_modules\jsonwebtoken\sign.js:107:20) at Query.<anonymous> (C:\Users\admin\Desktop\前端开发\Node.js\day6\code\api_server\router_handler\2user.js:49:26) at Query.<anonymous> (C:\Users\admin\Desktop\前端开发\Node.js\day6\code\api_server\node_modules\mysql\lib\Connection.js:526:10) at Query._callback (C:\Users\admin\Desktop\前端开发\Node.js\day6\code\api_server\node_modules\mysql\lib\Connection.js:488:16) at Sequence.end (C:\Users\admin\Desktop\前端开发\Node.js\day6\code\api_server\node_modules\mysql\lib\protocol\sequences\Sequence.js:83:24) at Query._handleFinalResultPacket (C:\Users\admin\Desktop\前端开发\Node.js\day6\code\api_server\node_modules\mysql\lib\protocol\sequences\Query.js:149:8) at Query.EofPacket (C:\Users\admin\Desktop\前端开发\Node.js\day6\code\api_server\node_modules\mysql\lib\protocol\sequences\Query.js:133:8) at Protocol._parsePacket (C:\Users\admin\Desktop\前端开发\Node.js\day6\code\api_server\node_modules\mysql\lib\protocol\Protocol.js:291:23) at Parser._parsePacket (C:\Users\admin\Desktop\前端开发\Node.js\day6\code\api_server\node_modules\mysql\lib\protocol\Parser.js:433:10) at Parser.write (C:\Users\admin\Desktop\前端开发\Node.js\day6\code\api_server\node_modules\mysql\lib\protocol\Parser.js:43:10) Node.js v18.12.1
06-08

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值