金融预测

一周算法实践进阶day3:模型融合

import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score,roc_auc_score
from mlxtend.classifier import StackingClassifier #Stacking库
from sklearn import model_selection

financial = pd.read_csv("data.csv", encoding = 'gbk')
fi_copy = financial.copy()

#preprocess
for x in ("source", "bank_card_no", "trade_no", "id_name"):
    fi_copy.drop([x], axis=1, inplace=True)
    
fi_copy = fi_copy.drop("status", axis=1)
fi_copy_num = fi_copy.select_dtypes(include='number')
fi_copy_obj = fi_copy.select_dtypes(exclude='number')
y = financial["status"]

fi_obj = pd.DataFrame()
fi_obj["reg_preference_for_trad"] = fi_copy_obj["reg_preference_for_trad"].map({'境外':0,'一线城市':1, '二线城市':2, '三线城市':3})
fi_obj['latest_query_time_year'] = pd.to_datetime(fi_copy_obj['latest_query_time']).dt.year
fi_obj['latest_query_time_month'] = pd.to_datetime(fi_copy_obj['latest_query_time']).dt.month
fi_obj['latest_query_time_weekday'] = pd.to_datetime(fi_copy_obj['latest_query_time']).dt.weekday
fi_obj['loans_latest_time_year'] = pd.to_datetime(fi_copy_obj['loans_latest_time']).dt.year
fi_obj['loans_latest_time_month'] = pd.to_datetime(fi_copy_obj['loans_latest_time']).dt.month
fi_obj['loans_latest_time_weekday'] = pd.to_datetime(fi_copy_obj['loans_latest_time']).dt.weekday

from sklearn.preprocessing import Imputer
imputer = Imputer(strategy="most_frequent")
imputer.fit(fi_copy_num)
fi_num = imputer.transform(fi_copy_num)
fi_num = pd.DataFrame(fi_num,columns=fi_copy_num.columns)
from sklearn.preprocessing import Imputer
imputer = Imputer(strategy="most_frequent")
imputer.fit(fi_obj)
fi_obj1 = imputer.transform(fi_obj)
fi_obj1 = pd.DataFrame(fi_obj1,columns=fi_obj.columns)
fi_pro = pd.concat([fi_num, fi_obj1], axis=1, sort=False)
fi_std_train, fi_std_test, y_train, y_test = train_test_split(fi_pro, y, test_size=0.3, random_state=2019)

def model_metrics(clf, X_test,  y_test):
    y_test_pred = clf.predict(X_test)
    y_test_prob = clf.predict_proba(X_test)[:, 1]

    accuracy = accuracy_score(y_test, y_test_pred)
    print('The accuracy: ', accuracy)
    precision = precision_score(y_test, y_test_pred)
    print('The precision: ', precision)
    recall = recall_score(y_test, y_test_pred)
    print('The recall: ', recall)
    f1_score = recall_score(y_test, y_test_pred)
    print('The F1 score: ', f1_score)

#structure model
lr = LogisticRegression(random_state=2019)
xgbc = XGBClassifier(random_state=2019)
lgbm = LGBMClassifier(random_state=2019)
rfc = RandomForestClassifier(random_state=2019)

sc = StackingClassifier(classifiers=[xgbc,lgbm,rfc],meta_classifier=lr)
sc.fit(fi_std_train,y_train.values)
model_metrics(sc,fi_std_test,y_test)

for ml,label in zip([xgbc,lgbm,rfc,sc],["XGBClassifier","LGBMClassifier","RandomForestClassifier","StackingClassifier"]):
    scores = model_selection.cross_val_score(ml,fi_std_train,y_train,cv = 5,scoring='accuracy')
    print("Accuracy: %0.2f (+/- %0.2f) [%s]"% (scores.mean(), scores.std(), label))
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值