#-*- coding: utf-8 -*-
import pandas as pd
import numpy as np
filename = 'bankloan.xls'
data = pd.read_excel(filename)
X = data.iloc[:,:8].as_matrix()
y = data.iloc[:,8].as_matrix()
from sklearn.linear_model import LogisticRegression as LR
from sklearn.linear_model import RandomizedLogisticRegression as RLR
rlr = RLR(selection_threshold=0.25) #建立随机逻辑回归模型,筛选变量
rlr.fit(X,y) #训练模型
chara_scores= rlr.scores_ #获得F值,0-1,分数越大,特征越重要
characters = ['age', 'education', 'years of work', 'address', 'income', 'debit radio', 'credit card', 'other debit']
score_table = {}
for i in range(len(chara_scores)):
score_table[characters[i]]=chara_scores[i]
print('the scores of the effective characters is %s' % score_table)
print(u'characters: %s' % '.'.join(data.columns[rlr.get_support()])) #输出重要特征
X_fit = data[data.columns[rlr.get_support()]].as_matrix()
lr = LR()
lr.fit(X_fit,y)
print('accuracy rate: %s' % lr.score(X_fit,y))
决策树
最新推荐文章于 2023-06-16 09:00:00 发布