Xgboost案例学习代码

import os
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
#%matplotlib inline

import xgboost as xgb
df = pd.read_csv('./data/LoanStats3a_2.csv',low_memory=False,skiprows=1)
df.isnull().any()  # 检查 null
df.head()
df.shape
df = df.iloc[:,2:111]  # 删掉很多空的列
empty_cols = [i for i in range(45,72)]   # 删除更多的列
df = df.drop(df.columns[empty_cols],axis=1)
df.shape
df = df[(df['loan_status']=="Fully Paid") | (df['loan_status']=="Charged Off")]
df['loan_status'] = df['loan_status'].map({'Fully Paid':0, 'Charged Off':1})
df=df.dropna(axis=1) #340000 is minimum number of non-NA values
df
df_grade = df['grade'].str.get_dummies().add_prefix('grade: ')
# 把类型独热编码
df_subgrad = df['sub_grade'].apply(str).str.get_dummies().add_prefix('sub_grade: ')
df_home = df['home_ownership'].apply(str).str.get_dummies().add_prefix('home_ownership: ')
df_addr = df['addr_state'].apply(str).str.get_dummies().add_prefix('addr_state: ')
df_term = df['term'].apply(str).str.get_dummies().add_prefix('term: ')

# 添加独热编码数据列
df = pd.concat([df, df_grade, df_subgrad, df_home, df_addr, df_term], axis=1)
# 去除独热编码对应的原始列
df = df.drop(['grade', 'sub_grade', 'home_ownership', 'addr_state', 'int_rate', 'term', 'zip_code','purpose','initial_list_status','initial_list_status','pymnt_plan','issue_d','earliest_cr_line','verification_status'], axis=1)
df.dtypes
df.corr()
# 准备数据
X = df.drop('loan_status', axis=1)
y = df['loan_status']
print (X.shape, y.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
xg_classifier = xgb.XGBClassifier(objective ='binary:logistic', colsample_bytree = 0.3, learning_rate = 0.1,
                max_depth = 5, alpha = 10, n_estimators = 10)
xg_classifier.fit(X_train,y_train)
xg_classifier.score(X_test, y_test)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值