包含全部示例的代码仓库见GIthub
1 导入库
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
2 绘图格式
plt.style.use('ggplot')
3 数据准备
data = pd.read_csv('./dataset/credit-a.csv', header=None)
data
# output
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
0 0 30.83 0.000 0 0 9 0 1.25 0 0 1 1 0 202 0.0 -1
1 1 58.67 4.460 0 0 8 1 3.04 0 0 6 1 0 43 560.0 -1
2 1 24.50 0.500 0 0 8 1 1.50 0 1 0 1 0 280 824.0 -1
3 0 27.83 1.540 0 0 9 0 3.75 0 0 5 0 0 100 3.0 -1
4 0 20.17 5.625 0 0 9 0 1.71 0 1 0 1 2 120 0.0 -1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
648 0 21.08 10.085 1 1 11 1 1.25 1 1 0 1 0 260 0.0 1
649 1 22.67 0.750 0 0 0 0 2.00 1 0 2 0 0 200 394.0 1
650 1 25.25 13.500 1 1 13 7 2.00 1 0 1 0 0 200 1.0 1
651 0 17.92 0.205 0 0 12 0 0.04 1 1 0 1 0 280 750.0 1
652 0 35.00 3.375 0 0 0 1 8.29 1 1 0 0 0 0 0.0 1
x = data[data.columns[:-1]]
y = data[15].replace(-1, 0)
x_train, x_test, y_train, y_test = train_test_split(x, y)
4 模型构建
model = LogisticRegression()
model.fit(x_train, y_train)
model.predict(x_test) # 返回分类结果
# output
array([1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1,
0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0,
0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1,
0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1,
1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0,
0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0,
0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1,
1, 1, 1, 1, 0, 1, 1, 1, 1, 0], dtype=int64)
model.predict_proba(x_test) # 返回分类概率值
# output
array([[6.60615371e-02, 9.33938463e-01],
[8.34478996e-01, 1.65521004e-01],
[1.41088370e-01, 8.58911630e-01],
[1.84079556e-01, 8.15920444e-01],
[9.77099511e-04, 9.99022900e-01],
[9.63819105e-01, 3.61808948e-02],
[8.51060751e-01, 1.48939249e-01],
[6.83732727e-01, 3.16267273e-01],
[9.27358436e-01, 7.26415638e-02],
[9.32467461e-01, 6.75325393e-02],
[5.69194960e-01, 4.30805040e-01],
[1.34069091e-01, 8.65930909e-01],
[9.87069532e-01, 1.29304683e-02],
[1.49668995e-01, 8.50331005e-01],
[1.16908139e-01, 8.83091861e-01],
[2.03066721e-01, 7.96933279e-01],
[8.56061038e-01, 1.43938962e-01],
[9.95836941e-01, 4.16305867e-03],
[2.93070764e-04, 9.99706929e-01],
[1.00000000e+00, 3.30189002e-16],
[9.83297981e-01, 1.67020194e-02],
[1.86251489e-01, 8.13748511e-01],
[5.35926548e-01, 4.64073452e-01],
[7.55667682e-01, 2.44332318e-01],
[6.43859743e-01, 3.56140257e-01],
[2.79667316e-01, 7.20332684e-01],
[9.63765556e-01, 3.62344443e-02],
[2.64071641e-02, 9.73592836e-01],
[4.19146429e-02, 9.58085357e-01],
[1.79191699e-01, 8.20808301e-01],
[1.93460409e-01, 8.06539591e-01],
[9.64036616e-02, 9.03596338e-01],
[1.21332162e-01, 8.78667838e-01],
[2.48143536e-01, 7.51856464e-01],
[2.20133110e-03, 9.97798669e-01],
[4.08915180e-01, 5.91084820e-01],
[7.65807010e-02, 9.23419299e-01],
[1.44802213e-01, 8.55197787e-01],
[9.98846818e-01, 1.15318205e-03],
[1.97074786e-01, 8.02925214e-01],
[2.83473824e-01, 7.16526176e-01],
[9.08388473e-01, 9.16115268e-02],
[8.25108746e-01, 1.74891254e-01],
[8.77065777e-01, 1.22934223e-01],
[7.83165840e-01, 2.16834160e-01],
[8.31404589e-01, 1.68595411e-01],
[1.56427134e-02, 9.84357287e-01],
[8.29387297e-01, 1.70612703e-01],
[1.06507496e-01, 8.93492504e-01],
[9.99995813e-01, 4.18677113e-06],
[2.20942896e-01, 7.79057104e-01],
[6.89460457e-03, 9.93105395e-01],
[3.55591481e-01, 6.44408519e-01],
[6.68914006e-01, 3.31085994e-01],
[4.95366015e-01, 5.04633985e-01],
[8.32531498e-01, 1.67468502e-01],
[1.00000000e+00, 2.44663902e-11],
[3.90869392e-01, 6.09130608e-01],
[4.88321272e-01, 5.11678728e-01],
[9.42062404e-01, 5.79375961e-02],
[1.56880504e-01, 8.43119496e-01],
[8.49206525e-01, 1.50793475e-01],
[1.94623394e-01, 8.05376606e-01],
[8.25040460e-01, 1.74959540e-01],
[8.45659998e-01, 1.54340002e-01],
[1.66965523e-01, 8.33034477e-01],
[9.94559601e-01, 5.44039916e-03],
[1.38836763e-01, 8.61163237e-01],
[3.42777589e-01, 6.57222411e-01],
[7.28376159e-01, 2.71623841e-01],
[1.30228900e-02, 9.86977110e-01],
[1.43409431e-01, 8.56590569e-01],
[2.82580320e-01, 7.17419680e-01],
[1.76323337e-01, 8.23676663e-01],
[5.37400050e-01, 4.62599950e-01],
[1.71094087e-01, 8.28905913e-01],
[4.95141183e-01, 5.04858817e-01],
[3.38614440e-02, 9.66138556e-01],
[8.16965131e-01, 1.83034869e-01],
[1.07113710e-01, 8.92886290e-01],
[1.77590609e-01, 8.22409391e-01],
[1.04104620e-01, 8.95895380e-01],
[6.88476144e-01, 3.11523856e-01],
[4.25696584e-02, 9.57430342e-01],
[7.16955909e-01, 2.83044091e-01],
[1.08485083e-01, 8.91514917e-01],
[6.71159064e-02, 9.32884094e-01],
[2.54821601e-02, 9.74517840e-01],
[9.60269763e-02, 9.03973024e-01],
[5.91442414e-01, 4.08557586e-01],
[1.52634801e-01, 8.47365199e-01],
[9.90429568e-01, 9.57043153e-03],
[6.38957493e-01, 3.61042507e-01],
[8.60182469e-01, 1.39817531e-01],
[6.07033553e-01, 3.92966447e-01],
[8.42531416e-02, 9.15746858e-01],
[4.46158080e-01, 5.53841920e-01],
[7.76886228e-01, 2.23113772e-01],
[8.64070117e-02, 9.13592988e-01],
[1.00736932e-02, 9.89926307e-01],
[1.48243067e-01, 8.51756933e-01],
[1.62073639e-01, 8.37926361e-01],
[5.46812011e-01, 4.53187989e-01],
[8.07066910e-01, 1.92933090e-01],
[1.73334950e-01, 8.26665050e-01],
[5.30791168e-02, 9.46920883e-01],
[3.51361385e-01, 6.48638615e-01],
[1.31771479e-01, 8.68228521e-01],
[6.74049876e-02, 9.32595012e-01],
[9.16807700e-01, 8.31923005e-02],
[9.99999104e-01, 8.95557855e-07],
[9.66401604e-01, 3.35983962e-02],
[4.82663509e-01, 5.17336491e-01],
[1.57841163e-02, 9.84215884e-01],
[9.41811072e-01, 5.81889280e-02],
[9.82444528e-01, 1.75554718e-02],
[6.46206097e-02, 9.35379390e-01],
[1.06196025e-01, 8.93803975e-01],
[6.13190015e-01, 3.86809985e-01],
[8.62913171e-01, 1.37086829e-01],
[9.72545008e-01, 2.74549917e-02],
[9.88004128e-01, 1.19958718e-02],
[9.99465727e-01, 5.34273145e-04],
[1.16548591e-01, 8.83451409e-01],
[2.51884231e-01, 7.48115769e-01],
[2.26399929e-01, 7.73600071e-01],
[4.83522417e-01, 5.16477583e-01],
[8.37031421e-01, 1.62968579e-01],
[6.13359088e-03, 9.93866409e-01],
[2.71283831e-01, 7.28716169e-01],
[9.53814562e-01, 4.61854379e-02],
[9.52414282e-01, 4.75857182e-02],
[8.43110707e-01, 1.56889293e-01],
[1.35242228e-01, 8.64757772e-01],
[3.36992363e-01, 6.63007637e-01],
[8.18726543e-01, 1.81273457e-01],
[2.29354989e-01, 7.70645011e-01],
[1.88943642e-01, 8.11056358e-01],
[1.46145773e-01, 8.53854227e-01],
[2.30817213e-01, 7.69182787e-01],
[8.66472491e-02, 9.13352751e-01],
[8.12998267e-01, 1.87001733e-01],
[8.03897904e-01, 1.96102096e-01],
[8.30199720e-02, 9.16980028e-01],
[9.00751049e-01, 9.92489506e-02],
[1.06650273e-01, 8.93349727e-01],
[7.65257470e-02, 9.23474253e-01],
[9.57502919e-01, 4.24970810e-02],
[7.60105651e-01, 2.39894349e-01],
[2.66443940e-01, 7.33556060e-01],
[7.94193465e-01, 2.05806535e-01],
[4.82328179e-01, 5.17671821e-01],
[1.47014979e-01, 8.52985021e-01],
[1.94260076e-01, 8.05739924e-01],
[1.38559212e-01, 8.61440788e-01],
[9.82125609e-02, 9.01787439e-01],
[2.92974865e-02, 9.70702514e-01],
[2.10696581e-01, 7.89303419e-01],
[6.69251981e-01, 3.30748019e-01],
[2.79784723e-01, 7.20215277e-01],
[1.99942546e-01, 8.00057454e-01],
[1.95397982e-01, 8.04602018e-01],
[1.26122756e-03, 9.98738772e-01],
[8.68284726e-01, 1.31715274e-01]])
5 模型评估
accuracy_score(model.predict(x_test), y_test)
# output
0.8475609756097561