逻辑回归

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('fivethirtyeight') #样式美化
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report#这个包是评价报告

准备数据

data = pd.read_csv('ex2data1.txt', names=['exam1', 'exam2', 'admitted'])
data.head()#看前五行
exam1exam2admitted
034.62366078.0246930
130.28671143.8949980
235.84740972.9021980
360.18259986.3085521
479.03273675.3443761
data.describe()
exam1exam2admitted
count100.000000100.000000100.000000
mean65.64427466.2219980.600000
std19.45822218.5827830.492366
min30.05882230.6032630.000000
25%50.91951148.1792050.000000
50%67.03298867.6823811.000000
75%80.21252979.3606051.000000
max99.82785898.8694361.000000
sns.set(context="notebook", style="darkgrid", palette=sns.color_palette("RdBu", 2))

sns.lmplot(x='exam1', y='exam2', hue='admitted', data=data, 
           height=6, 
           fit_reg=False, 
           scatter_kws={"s": 50}
          )
plt.show()#看下数据的样子

def get_X(df):#读取特征
#     """
#     use concat to add intersect feature to avoid side effect
#     not efficient for big dataset though
#     """
    ones = pd.DataFrame({'ones': np.ones(len(df))})#ones是m行1列的dataframe
    data = pd.concat([ones, df], axis=1)  # 合并数据,根据列合并
    return data.iloc[:, :-1].values  # 这个操作返回 ndarray,不是矩阵


def get_y(df):#读取标签
#     '''assume the last column is the target'''
    return np.array(df.iloc[:, -1])#df.iloc[:, -1]是指df的最后一列


def normalize_feature(df):
#     """Applies function along input axis(default 0) of DataFrame."""
    return df.apply(lambda column: (column - column.mean()) / column.std())#特征缩放
X = get_X(data)
print(X.shape)

y = get_y(data)
print(y.shape)
(100, 3)
(100,)

sigmoid 函数

g 代表一个常用的逻辑函数(logistic function)为S形函数(Sigmoid function),公式为: g ( z ) = 1 1 + e − z g\left( z \right)=\frac{1}{1+{{e}^{-z}}} g(z)=1+ez1
合起来,我们得到逻辑回归模型的假设函数:
h θ ( x ) = 1 1 + e − θ T X {{h}_{\theta }}\left( x \right)=\frac{1}{1+{{e}^{-{{\theta }^{T}}X}}} hθ(x)=1+eθTX1

def sigmoid(z):
    # your code here  (appro ~ 1 lines)
    gz = 1 / (1 + np.exp(-z))
    return gz

下面程序会调用上面你写好的函数,并画出sigmoid函数图像。如果你的程序正确,你应该能在下方看到函数图像。

fig, ax = plt.subplots(figsize=(8, 6))
ax.plot(np.arange(-10, 10, step=0.01),
        sigmoid(np.arange(-10, 10, step=0.01)))
ax.set_ylim((-0.1,1.1))
ax.set_xlabel('z', fontsize=18)
ax.set_ylabel('g(z)', fontsize=18)
ax.set_title('sigmoid function', fontsize=18)
plt.show()

cost function(代价函数)

  • m a x ( ℓ ( θ ) ) = m i n ( − ℓ ( θ ) ) max(\ell(\theta)) = min(-\ell(\theta)) max((θ))=min((θ))
  • choose − ℓ ( θ ) -\ell(\theta) (θ) as the cost function

J ( θ ) = − 1 m ∑ i = 1 m [ y ( i ) log ⁡ ( h θ ( x ( i ) ) ) + ( 1 − y ( i ) ) log ⁡ ( 1 − h θ ( x ( i ) ) ) ] = 1 m ∑ i = 1 m [ − y ( i ) log ⁡ ( h θ ( x ( i ) ) ) − ( 1 − y ( i ) ) log ⁡ ( 1 − h θ ( x ( i ) ) ) ] J\left( \theta \right)=-\frac{1}{m}\sum\limits_{i=1}^{m}{[{{y}^{(i)}}\log \left( {{h}_{\theta }}\left( {{x}^{(i)}} \right) \right)+\left( 1-{{y}^{(i)}} \right)\log \left( 1-{{h}_{\theta }}\left( {{x}^{(i)}} \right) \right)]} =\frac{1}{m}\sum\limits_{i=1}^{m}{[-{{y}^{(i)}}\log \left( {{h}_{\theta }}\left( {{x}^{(i)}} \right) \right)-\left( 1-{{y}^{(i)}} \right)\log \left( 1-{{h}_{\theta }}\left( {{x}^{(i)}} \right) \right)]} J(θ)=m1i=1m[y(i)log(hθ(x(i)))+(1y(i))log(1hθ(x(i)))]=m1i=1m[y(i)log(hθ(x(i)))(1y(i))log(1hθ(x(i)))]

theta = theta=np.zeros(3) # X(m*n) so theta is n*1
theta
array([0., 0., 0.])
def cost(theta, X, y):
    ''' cost fn is -l(theta) for you to minimize'''
    # your code here  (appro ~ 2 lines)
    h = sigmoid(np.dot(theta.T, X.T))
    m = len(y)
    costf = (np.dot(-y, np.log(h)) - np.dot(1 - y, np.log(1 - h))) / m
    return costf
# Hint:X @ theta与X.dot(theta)等价
cost(theta, X, y)
0.6931471805599453

gradient descent(梯度下降)

  • 这是批量梯度下降(batch gradient descent)
  • 转化为向量化计算: 1 m X T ( S i g m o i d ( X θ ) − y ) \frac{1}{m} X^T( Sigmoid(X\theta) - y ) m1XT(Sigmoid(Xθ)y)
    ∂ J ( θ ) ∂ θ j = 1 m ∑ i = 1 m ( h θ ( x ( i ) ) − y ( i ) ) x j ( i ) \frac{\partial J\left( \theta \right)}{\partial {{\theta }_{j}}}=\frac{1}{m}\sum\limits_{i=1}^{m}{({{h}_{\theta }}\left( {{x}^{(i)}} \right)-{{y}^{(i)}})x_{_{j}}^{(i)}} θjJ(θ)=m1i=1m(hθ(x(i))y(i))xj(i)
def gradient(theta, X, y):
    # your code here  (appro ~ 2 lines)
    h = sigmoid(np.dot(X, theta))
    m = len(y)
    grad = np.dot(X.T, h - y) / m
    return grad
gradient(theta, X, y)
array([ -0.1       , -12.00921659, -11.26284221])

拟合参数

import scipy.optimize as opt
res = opt.minimize(fun=cost, x0=theta, args=(X, y), method='Newton-CG', jac=gradient)
print(res)
     fun: 0.20355713617495905
     jac: array([ 0.00014506, -0.00324422, -0.00261146])
 message: 'Optimization terminated successfully.'
    nfev: 63
    nhev: 0
     nit: 25
    njev: 160
  status: 0
 success: True
       x: array([-24.53591642,   0.20123168,   0.19640851])

用训练集预测和验证

def predict(x, theta):
    # your code here  (appro ~ 2 lines)
    
    y_pred = sigmoid(X @ theta)
    return (y_pred >= 0.5).astype(int)
final_theta = res.x
y_pred = predict(X, final_theta)

print(classification_report(y, y_pred))
              precision    recall  f1-score   support

           0       0.87      0.85      0.86        40
           1       0.90      0.92      0.91        60

    accuracy                           0.89       100
   macro avg       0.89      0.88      0.88       100
weighted avg       0.89      0.89      0.89       100

寻找决策边界

http://stats.stackexchange.com/questions/93569/why-is-logistic-regression-a-linear-classifier

X × θ = 0 X \times \theta = 0 X×θ=0 (this is the line)

print(res.x) # this is final theta
[-24.53591642   0.20123168   0.19640851]
coef = -(res.x / res.x[2])  # find the equation
print(coef)

x = np.arange(130, step=0.1)
y = coef[0] + coef[1]*x
[124.92287633  -1.02455683  -1.        ]
data.describe()  # find the range of x and y
exam1exam2admitted
count100.000000100.000000100.000000
mean65.64427466.2219980.600000
std19.45822218.5827830.492366
min30.05882230.6032630.000000
25%50.91951148.1792050.000000
50%67.03298867.6823811.000000
75%80.21252979.3606051.000000
max99.82785898.8694361.000000
sns.set(context="notebook", style="ticks", font_scale=1.5)

sns.lmplot(x='exam1', y='exam2', hue='admitted', data=data, 
           height=6, 
           fit_reg=False, 
           scatter_kws={"s": 25}
          )

plt.plot(x, y, 'grey')
plt.xlim(0, 130)
plt.ylim(0, 130)
plt.title('Decision Boundary')
plt.show()

在这里插入图片描述

3- 正则化逻辑回归

df = pd.read_csv('ex2data2.txt', names=['test1', 'test2', 'accepted'])
df.head()
test1test2accepted
00.0512670.699561
1-0.0927420.684941
2-0.2137100.692251
3-0.3750000.502191
4-0.5132500.465641
sns.set(context="notebook", style="ticks", font_scale=1.5)

sns.lmplot(x='test1', y='test2', hue='accepted', data=df, 
           height=6, 
           fit_reg=False, 
           scatter_kws={"s": 50}
          )

plt.title('Regularized Logistic Regression')
plt.show()

在这里插入图片描述

feature mapping(特征映射)

polynomial expansion

for i in 0..i
  for p in 0..i:
    output x^(i-p) * y^p
def feature_mapping(x, y, power, as_ndarray=False):
#     """return mapped features as ndarray or dataframe"""

    data = {"f{}{}".format(i - p, p): np.power(x, i - p) * np.power(y, p)
                for i in np.arange(power + 1)
                for p in np.arange(i + 1)
            }

    if as_ndarray:
        return pd.DataFrame(data).values
    else:
        return pd.DataFrame(data)

x1 = np.array(df.test1)
x2 = np.array(df.test2)
data = feature_mapping(x1, x2, power=6)
print(data.shape)
data.head()
(118, 28)
f00f10f01f20f11f02f30f21f12f03...f23f14f05f60f51f42f33f24f15f06
01.00.0512670.699560.0026280.0358640.4893840.0001350.0018390.0250890.342354...0.0009000.0122780.1675421.815630e-082.477505e-070.0000030.0000460.0006290.0085890.117206
11.0-0.0927420.684940.008601-0.0635230.469143-0.0007980.005891-0.0435090.321335...0.002764-0.0204120.1507526.362953e-07-4.699318e-060.000035-0.0002560.001893-0.0139810.103256
21.0-0.2137100.692250.045672-0.1479410.479210-0.0097610.031616-0.1024120.331733...0.015151-0.0490770.1589709.526844e-05-3.085938e-040.001000-0.0032380.010488-0.0339730.110047
31.0-0.3750000.502190.140625-0.1883210.252195-0.0527340.070620-0.0945730.126650...0.017810-0.0238510.0319402.780914e-03-3.724126e-030.004987-0.0066790.008944-0.0119780.016040
41.0-0.5132500.465640.263426-0.2389900.216821-0.1352030.122661-0.1112830.100960...0.026596-0.0241280.0218901.827990e-02-1.658422e-020.015046-0.0136500.012384-0.0112350.010193

5 rows × 28 columns

data.describe()
f00f10f01f20f11f02f30f21f12f03...f23f14f05f60f51f42f33f24f15f06
count118.0118.000000118.000000118.000000118.000000118.0000001.180000e+02118.000000118.000000118.000000...118.0000001.180000e+02118.0000001.180000e+02118.0000001.180000e+02118.0000001.180000e+02118.0000001.180000e+02
mean1.00.0547790.1831020.247575-0.0254720.3013705.983333e-020.0306820.0154830.142350...0.0182784.089084e-030.1157107.837118e-02-0.0007031.893340e-02-0.0017052.259170e-02-0.0063021.257256e-01
std0.00.4966540.5197430.2485320.2240750.2845362.746459e-010.1347060.1501430.326134...0.0585139.993907e-020.2990921.938621e-010.0582713.430092e-020.0374434.346935e-020.0906212.964416e-01
min1.0-0.830070-0.7697400.000040-0.4840960.000026-5.719317e-01-0.358121-0.483743-0.456071...-0.142660-4.830370e-01-0.2702226.472253e-14-0.2039712.577297e-10-0.1134482.418097e-10-0.4826841.795116e-14
25%1.0-0.372120-0.2543850.043243-0.1782090.061086-5.155632e-02-0.023672-0.042980-0.016492...-0.001400-7.449462e-03-0.0010728.086369e-05-0.0063811.258285e-04-0.0057493.528590e-04-0.0166622.298277e-04
50%1.0-0.0063360.2134550.165397-0.0165210.252195-2.544062e-070.006603-0.0000390.009734...0.001026-8.972096e-090.0004444.527344e-03-0.0000043.387050e-03-0.0000053.921378e-03-0.0000201.604015e-02
75%1.00.4789700.6465630.3899250.1007950.4641891.099616e-010.0863920.0795100.270310...0.0211482.751341e-020.1130205.932959e-020.0021042.090875e-020.0010242.103622e-020.0012891.001215e-01
max1.01.0709001.1089001.1468270.5683071.2296591.228137e+000.4492510.5055771.363569...0.2873234.012965e-011.6767251.508320e+000.2505772.018260e-010.1835482.556084e-010.4362091.859321e+00

8 rows × 28 columns

regularized cost(正则化代价函数)

J ( θ ) = 1 m ∑ i = 1 m [ − y ( i ) log ⁡ ( h θ ( x ( i ) ) ) − ( 1 − y ( i ) ) log ⁡ ( 1 − h θ ( x ( i ) ) ) ] + λ 2 m ∑ j = 1 n θ j 2 J\left( \theta \right)=\frac{1}{m}\sum\limits_{i=1}^{m}{[-{{y}^{(i)}}\log \left( {{h}_{\theta }}\left( {{x}^{(i)}} \right) \right)-\left( 1-{{y}^{(i)}} \right)\log \left( 1-{{h}_{\theta }}\left( {{x}^{(i)}} \right) \right)]}+\frac{\lambda }{2m}\sum\limits_{j=1}^{n}{\theta _{j}^{2}} J(θ)=m1i=1m[y(i)log(hθ(x(i)))(1y(i))log(1hθ(x(i)))]+2mλj=1nθj2

theta = np.zeros(data.shape[1])
X = feature_mapping(x1, x2, power=6, as_ndarray=True)
print(X.shape)

y = get_y(df)
print(y.shape)
(118, 28)
(118,)
def regularized_cost(theta, X, y, l=1):
    # your code here  (appro ~ 3 lines
    
    m = len(y)
    regu_cost = cost(theta, X, y) + l / (2 * m) * np.sum(np.power(theta, 2))
    return regu_cost

regularized_cost(theta, X, y, l=1)
0.6931471805599454

因为我们设置theta为0,所以这个正则化代价函数与代价函数的值应该相同

regularized gradient(正则化梯度)

∂ J ( θ ) ∂ θ j = ( 1 m ∑ i = 1 m ( h θ ( x ( i ) ) − y ( i ) ) ) + λ m θ j   for j ≥ 1 \frac{\partial J\left( \theta \right)}{\partial {{\theta }_{j}}}=\left( \frac{1}{m}\sum\limits_{i=1}^{m}{\left( {{h}_{\theta }}\left( {{x}^{\left( i \right)}} \right)-{{y}^{\left( i \right)}} \right)} \right)+\frac{\lambda }{m}{{\theta }_{j}}\text{ }\text{ for j}\ge \text{1} θjJ(θ)=(m1i=1m(hθ(x(i))y(i)))+mλθj  for j1

def regularized_gradient(theta, X, y, l=1):
    # your code here  (appro ~ 3 lines)
    
    m = len(y)
    regularized_term = l / m * theta
    return gradient(theta, X, y) + regularized_term
regularized_gradient(theta, X, y)
array([8.47457627e-03, 1.87880932e-02, 7.77711864e-05, 5.03446395e-02,
       1.15013308e-02, 3.76648474e-02, 1.83559872e-02, 7.32393391e-03,
       8.19244468e-03, 2.34764889e-02, 3.93486234e-02, 2.23923907e-03,
       1.28600503e-02, 3.09593720e-03, 3.93028171e-02, 1.99707467e-02,
       4.32983232e-03, 3.38643902e-03, 5.83822078e-03, 4.47629067e-03,
       3.10079849e-02, 3.10312442e-02, 1.09740238e-03, 6.31570797e-03,
       4.08503006e-04, 7.26504316e-03, 1.37646175e-03, 3.87936363e-02])

拟合参数

import scipy.optimize as opt
print('init cost = {}'.format(regularized_cost(theta, X, y)))

res = opt.minimize(fun=regularized_cost, x0=theta, args=(X, y), method='Newton-CG', jac=regularized_gradient)
res
init cost = 0.6931471805599454
     fun: 0.5351602503809838
     jac: array([-4.40953967e-08,  4.80421622e-09, -2.93966980e-09, -8.45363795e-09,
        1.70329722e-08,  3.38119611e-08, -2.76831472e-09, -8.60782540e-09,
        1.46305718e-09,  7.94770647e-09, -1.06087740e-08,  7.86887349e-10,
       -6.42298185e-09,  7.96794764e-09,  2.02837825e-08, -5.31013168e-10,
       -6.81811705e-09,  1.97820237e-09, -5.09429939e-09, -3.89325855e-11,
        6.80254916e-09, -6.95320736e-09, -7.24708972e-10, -1.04093445e-09,
        1.35729125e-09, -2.03967601e-09,  1.65029609e-09,  9.68433032e-09])
 message: 'Optimization terminated successfully.'
    nfev: 7
    nhev: 0
     nit: 6
    njev: 53
  status: 0
 success: True
       x: array([ 1.14213418,  0.60132066,  1.16718272, -1.87174547, -0.91573325,
       -1.26952692,  0.12668314, -0.36873143, -0.34518464, -0.17377595,
       -1.42386031, -0.04855655, -0.6064203 , -0.26931816, -1.16315916,
       -0.24310139, -0.20707435, -0.04318408, -0.28028167, -0.28695652,
       -0.46910528, -1.03618073,  0.02923477, -0.29262232,  0.0173672 ,
       -0.32897267, -0.13795895, -0.93199012])

预测

final_theta = res.x
y_pred = predict(X, final_theta)

print(classification_report(y, y_pred))
              precision    recall  f1-score   support

           0       0.88      0.75      0.81        60
           1       0.78      0.90      0.83        58

    accuracy                           0.82       118
   macro avg       0.83      0.82      0.82       118
weighted avg       0.83      0.82      0.82       118

使用不同的 λ \lambda λ (这个是常数)

画出决策边界

  • 我们找到所有满足 X × θ = 0 X\times \theta = 0 X×θ=0 的x
  • instead of solving polynomial equation, just create a coridate x,y grid that is dense enough, and find all those X × θ X\times \theta X×θ that is close enough to 0, then plot them
def draw_boundary(power, l):
#     """
#     power: polynomial power for mapped feature
#     l: lambda constant
#     """
    density = 1000
    threshhold = 2 * 10**-3

    final_theta = feature_mapped_logistic_regression(power, l)
    x, y = find_decision_boundary(density, power, final_theta, threshhold)

    df = pd.read_csv('ex2data2.txt', names=['test1', 'test2', 'accepted'])
    sns.lmplot(x='test1', y='test2', hue='accepted', data=df, height=6, fit_reg=False, scatter_kws={"s": 100})

    plt.scatter(x, y, c='r', s=10)
    plt.title('Decision boundary')
    plt.show()
def feature_mapped_logistic_regression(power, l):
#     """for drawing purpose only.. not a well generealize logistic regression
#     power: int
#         raise x1, x2 to polynomial power
#     l: int
#         lambda constant for regularization term
#     """
    df = pd.read_csv('ex2data2.txt', names=['test1', 'test2', 'accepted'])
    x1 = np.array(df.test1)
    x2 = np.array(df.test2)
    y = get_y(df)

    X = feature_mapping(x1, x2, power, as_ndarray=True)
    theta = np.zeros(X.shape[1])

    res = opt.minimize(fun=regularized_cost,
                       x0=theta,
                       args=(X, y, l),
                       method='TNC',
                       jac=regularized_gradient)
    final_theta = res.x

    return final_theta
def find_decision_boundary(density, power, theta, threshhold):
    t1 = np.linspace(-1, 1.5, density)
    t2 = np.linspace(-1, 1.5, density)

    cordinates = [(x, y) for x in t1 for y in t2]
    x_cord, y_cord = zip(*cordinates)
    mapped_cord = feature_mapping(x_cord, y_cord, power)  # this is a dataframe

    inner_product = mapped_cord.values @ theta

    decision = mapped_cord[np.abs(inner_product) < threshhold]
    return decision.f10, decision.f01
#寻找决策边界函数

改变 λ \lambda λ的值,查看效果(选做)

draw_boundary(power=6, l=1)     #set lambda = 1

draw_boundary(power=6, l=0.01)  # set lambda < 0.1

draw_boundary(power=6, l=100)  # set lambda > 10

在这里插入图片描述

源码github

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值