Machine Learning(Andrew Ng)ex2.logistic regression

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report

data=pd.read_csv('ex2data1.txt',header=None,names=['Exam1','Exam2','Admitted'])
data.head()
Exam1Exam2Admitted
034.62366078.0246930
130.28671143.8949980
235.84740972.9021980
360.18259986.3085521
479.03273675.3443761
data.head()
Exam1Exam2Admitted
034.62366078.0246930
130.28671143.8949980
235.84740972.9021980
360.18259986.3085521
479.03273675.3443761
positive=data[data['Admitted'].isin([1])]#用法有待考究
negative=data[data['Admitted'].isin([0])]
positive.head()
data['Admitted'].isin([1]).head()
0    False
1    False
2    False
3     True
4     True
Name: Admitted, dtype: bool
positive.head()
Exam1Exam2Admitted
360.18259986.3085521
479.03273675.3443761
661.10666596.5114261
775.02474646.5540141
876.09878787.4205701
sns.set(context='notebook',style='darkgrid') 
sns.lmplot('Exam1','Exam2',hue='Admitted',data=data,fit_reg=False) 
<seaborn.axisgrid.FacetGrid at 0x1d1d7c26e10>

在这里插入图片描述

def sigmoid(z):
    return 1/(1+np.exp(-z))
def get_X(df):
    data.insert(0,'Ones',1)
    return data.iloc[:, :-1].as_matrix()

def get_y(df):
    return np.array(df.iloc[:,-1])

def normalize_feature(df):
    df=(df-df.mean())/df.std()
    return df
theta=np.zeros(3)

Logisc Regression cost function

J ( θ ) = 1 m ∑ i = 1 m [ − y ( i ) log ⁡ ( h θ ( x ( i ) ) ) − ( 1 − y ( i ) ) log ⁡ ( 1 − h θ ( x ( i ) ) ) ] J(\theta)=\frac{1}{m} \sum_{i=1}^{m}\left[-y^{(i)} \log \left(h_{\theta}\left(x^{(i)}\right)\right)-\left(1-y^{(i)}\right) \log \left(1-h_{\theta}\left(x^{(i)}\right)\right)\right] J(θ)=m1i=1m[y(i)log(hθ(x(i)))(1y(i))log(1hθ(x(i)))]

def cost(theta,X,y):#@代表矩阵乘法
    return np.mean(-y*np.log(sigmoid(X@theta))-(1-y)*np.log(1-sigmoid(X@theta)))

X=get_X(data)
y=get_y(data)
c:\users\19664\appdata\local\programs\python\python37\lib\site-packages\ipykernel_launcher.py:3: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.
  This is separate from the ipykernel package so we can avoid doing imports until
cost(theta,X,y)
0.6931471805599453

gradient descent(梯度下降)

  • 这是批量梯度下降(batch gradient descent)
  • 转化为向量化计算: 1 m X T ( S i g m o i d ( X θ ) − y ) \frac{1}{m} X^T( Sigmoid(X\theta) - y ) m1XT(Sigmoid(Xθ)y)
    ∂ J ( θ ) ∂ θ j = 1 m ∑ i = 1 m ( h θ ( x ( i ) ) − y ( i ) ) x j ( i ) \frac{\partial J\left( \theta \right)}{\partial {{\theta }_{j}}}=\frac{1}{m}\sum\limits_{i=1}^{m}{({{h}_{\theta }}\left( {{x}^{(i)}} \right)-{{y}^{(i)}})x_{_{j}}^{(i)}} θjJ(θ)=m1i=1m(hθ(x(i))y(i))xj(i)
def gradient_decent(theta, x, y):
#     '''just 1 batch gradient'''
    return (1 / len(x)) * (((sigmoid(x @ theta) - y).T) @ x)
gradient_decent(theta,X,y)
array([ -0.1       , -12.00921659, -11.26284221])
import scipy
import scipy.optimize as opt
res=opt.minimize(fun=cost,x0=theta,args=(X,y),method='Newton-CG', jac=gradient_decent)
print(res)
     fun: 0.20349770628264552
     jac: array([-5.89271913e-06, -4.23590443e-04, -5.16057328e-04])
 message: 'Optimization terminated successfully.'
    nfev: 71
    nhev: 0
     nit: 28
    njev: 242
  status: 0
 success: True
       x: array([-25.15571627,   0.20618678,   0.20142615])

训练集预测检验

def predict(x,theta):
        h=sigmoid(x@theta)
        return (h >= 0.5).astype(int)#用法有待考究

寻找决策边界

X × θ = 0 X \times \theta = 0 X×θ=0

\[{{h}_{\theta }}\left( x \right)=\frac{1}{1+{{e}^{-{{\theta }^{T}}X}}}\]
h θ ( x ) = 1 / 2 h_{\theta}(x)=1/2 hθ(x)=1/2
θ 0 + θ 1 X 1 + θ 2 X 2 = 0 \theta_{0}+\theta_{1} X_{1}+\theta_{2} X_{2}=0 θ0+θ1X1+θ2X2=0
X 2 = θ 0 + θ 1 ⋅ x 1 θ 2 X_{2}=\frac{\theta_{0}+\theta_{1} \cdot x_{1}}{\theta_{2}} X2=θ2θ0+θ1x1

coef = -(res.x / res.x[2])
x = np.arange(130, step=0.1)
y = coef[0] + coef[1]*x
sns.set(context='notebook',style='darkgrid') 
sns.lmplot('Exam1','Exam2',hue='Admitted',data=data,fit_reg=False)
plt.plot(x, y, 'r')
plt.xlim(0, 130)
plt.ylim(0, 130)
plt.title('Decision Boundary')
plt.show()

在这里插入图片描述

逻辑化正则回归(处理关系过拟合的问题)

data2=pd.read_csv('ex2data2.txt',names=['test1','test2','accepted'])
data2.head()
test1test2accepted
00.0512670.699561
1-0.0927420.684941
2-0.2137100.692251
3-0.3750000.502191
4-0.5132500.465641
sns.set(context='notebook',style='darkgrid')
sns.lmplot('test1','test2',hue='accepted',data=data2,fit_reg=False)
<seaborn.axisgrid.FacetGrid at 0x1d1dad38588>

在这里插入图片描述

data2.head()
test1test2accepted
00.0512670.699561
1-0.0927420.684941
2-0.2137100.692251
3-0.3750000.502191
4-0.5132500.465641

feature mapping(特征扩张,构造多项式特征)

def feature_mapping(x1,x2,power):#特征映射,创造新特征 
    data=pd.DataFrame()
    for i in range(power+1):#for i in range(0)这个是1坑,遵从左开右闭的规则         
        for j in range(0,i+1):     
            data['F' + str(j) + str(i)] = np.power(x1, i-j) * np.power(x2, j)
    return data
x1=np.array(data2.test1)
x2=np.array(data2.test2)
data_3=feature_mapping(x1,x2,6)
data_3.head()
F00F01F11F02F12F22F03F13F23F33...F35F45F55F06F16F26F36F46F56F66
01.00.0512670.699560.0026280.0358640.4893840.0001350.0018390.0250890.342354...0.0009000.0122780.1675421.815630e-082.477505e-070.0000030.0000460.0006290.0085890.117206
11.0-0.0927420.684940.008601-0.0635230.469143-0.0007980.005891-0.0435090.321335...0.002764-0.0204120.1507526.362953e-07-4.699318e-060.000035-0.0002560.001893-0.0139810.103256
21.0-0.2137100.692250.045672-0.1479410.479210-0.0097610.031616-0.1024120.331733...0.015151-0.0490770.1589709.526844e-05-3.085938e-040.001000-0.0032380.010488-0.0339730.110047
31.0-0.3750000.502190.140625-0.1883210.252195-0.0527340.070620-0.0945730.126650...0.017810-0.0238510.0319402.780914e-03-3.724126e-030.004987-0.0066790.008944-0.0119780.016040
41.0-0.5132500.465640.263426-0.2389900.216821-0.1352030.122661-0.1112830.100960...0.026596-0.0241280.0218901.827990e-02-1.658422e-020.015046-0.0136500.012384-0.0112350.010193

5 rows × 28 columns

regularized cost(正则化代价函数)

J ( θ ) = 1 m ∑ i = 1 m [ − y ( i ) log ⁡ ( h θ ( x ( i ) ) ) − ( 1 − y ( i ) ) log ⁡ ( 1 − h θ ( x ( i ) ) ) ] + λ 2 m ∑ j = 1 n θ j 2 J\left( \theta \right)=\frac{1}{m}\sum\limits_{i=1}^{m}{[-{{y}^{(i)}}\log \left( {{h}_{\theta }}\left( {{x}^{(i)}} \right) \right)-\left( 1-{{y}^{(i)}} \right)\log \left( 1-{{h}_{\theta }}\left( {{x}^{(i)}} \right) \right)]}+\frac{\lambda }{2m}\sum\limits_{j=1}^{n}{\theta _{j}^{2}} J(θ)=m1i=1m[y(i)log(hθ(x(i)))(1y(i))log(1hθ(x(i)))]+2mλj=1nθj2

theta=np.zeros(data_3.shape[1])
X=feature_mapping(x1,x2,power=6)
print(X.shape)
y=get_y(data2)
print(y.shape)
print(theta.shape)
(118, 28)
(118,)
(28,)
def regularized_cost(theta,x,y,l=1):
    theta_new=theta[1:]
    regularized_term=(l/(2*len(X)))*np.sum(np.power(theta_new,2))
    return cost(theta,x,y)+regularized_term   
regularized_cost(theta,X,y,l=1)
0.6931471805599454

regularized gradient(正则化梯度)

∂ J ( θ ) ∂ θ j = ( 1 m ∑ i = 1 m ( h θ ( x ( i ) ) − y ( i ) ) ) + λ m θ j   for j ≥ 1 \frac{\partial J\left( \theta \right)}{\partial {{\theta }_{j}}}=\left( \frac{1}{m}\sum\limits_{i=1}^{m}{\left( {{h}_{\theta }}\left( {{x}^{\left( i \right)}} \right)-{{y}^{\left( i \right)}} \right)} \right)+\frac{\lambda }{m}{{\theta }_{j}}\text{ }\text{ for j}\ge \text{1} θjJ(θ)=(m1i=1m(hθ(x(i))y(i)))+mλθj  for j1

def regularized_gradient(theta,X,y,l=1):
    theta_new=theta[1:]
    regularized_term=np.concatenate([np.array([0]),(l/len(X))*theta_new])
    return np.array(gradient_decent(theta,X,y)+regularized_term)
regularized_gradient(theta, X, y)
array([8.47457627e-03, 1.87880932e-02, 7.77711864e-05, 5.03446395e-02,
       1.15013308e-02, 3.76648474e-02, 1.83559872e-02, 7.32393391e-03,
       8.19244468e-03, 2.34764889e-02, 3.93486234e-02, 2.23923907e-03,
       1.28600503e-02, 3.09593720e-03, 3.93028171e-02, 1.99707467e-02,
       4.32983232e-03, 3.38643902e-03, 5.83822078e-03, 4.47629067e-03,
       3.10079849e-02, 3.10312442e-02, 1.09740238e-03, 6.31570797e-03,
       4.08503006e-04, 7.26504316e-03, 1.37646175e-03, 3.87936363e-02])
import scipy.optimize as opt
print('init cost={}'.format(regularized_cost(theta,X,y)))
res =opt.minimize(fun=regularized_cost,x0=theta,args=(X,y),method='Newton-CG',jac=regularized_gradient)
res
init cost=0.6931471805599454





     fun: 0.5290027297129302
     jac: array([ 4.52421907e-08, -4.22499842e-09, -1.08282314e-08, -2.83522806e-08,
        1.82082428e-08, -5.66533954e-08, -4.95369310e-09, -2.16518576e-08,
        5.33617573e-09, -7.14897884e-09, -2.90197498e-08,  2.87231263e-09,
       -3.76049930e-09,  8.31922510e-10, -2.38721746e-08, -7.87126082e-09,
       -7.91557401e-09,  6.58521091e-10, -3.91583210e-09,  2.11321802e-09,
       -2.24182045e-09,  1.16699260e-09,  7.98161960e-10, -5.02280356e-09,
        2.78008348e-09,  5.86976469e-10,  6.29971969e-09, -2.17798713e-09])
 message: 'Optimization terminated successfully.'
    nfev: 7
    nhev: 0
     nit: 6
    njev: 68
  status: 0
 success: True
       x: array([ 1.27274165,  0.62527222,  1.18108934, -2.01996318, -0.91742302,
       -1.43166909,  0.12400628, -0.36553617, -0.35723928, -0.17513015,
       -1.4581588 , -0.05098913, -0.61555469, -0.27470694, -1.19281753,
       -0.24218865, -0.2060067 , -0.04473068, -0.2777846 , -0.29537814,
       -0.45635668, -1.0432015 ,  0.02777166, -0.29243155,  0.01556695,
       -0.32737909, -0.14388666, -0.92465139])
final_theta = res.x
y_pred = predict(X, final_theta)

print(classification_report(y, y_pred))
              precision    recall  f1-score   support

           0       0.90      0.75      0.82        60
           1       0.78      0.91      0.84        58

    accuracy                           0.83       118
   macro avg       0.84      0.83      0.83       118
weighted avg       0.84      0.83      0.83       118

决策边界可视化

def feature_mapped_logistic_regression(power,l):
    df=pd.read_csv('ex2data2.txt',names=['test1','test2','accepted'])
    x1=np.array(df.test1)
    x2=np.array(df.test2)
    y=get_y(df)
    X = feature_mapping(x1, x2, power)
    theta = np.zeros(X.shape[1])
    res = opt.minimize(fun=regularized_cost,
                       x0=theta,
                       args=(X, y, l),
                       method='TNC',
                       jac=regularized_gradient)
    final_theta = res.x

    return final_theta
    
def find_decision_boundary(density,power,theta,threshhold):
    t1=np.linspace(-1,1.5,density)
    t2=np.linspace(-1,1.5,density)
    cordinates=[(x,y) for x in t1 for y in t2]
    x_cord, y_cord = zip(*cordinates)
    mapped_cord=feature_mapping(x_cord, y_cord, power)#创造新的数据集
    inner_product = mapped_cord.as_matrix() @ theta.T#97*28和28*1,一维度矩阵不区分行列
    decision = mapped_cord[np.abs(inner_product) < threshhold]
    return decision['F11'],decision['F01']
    
def draw_boundary(power, l):
    density = 1000
    threshhold = 2 * 10**-3#这里这个值的计算方法为每一行为log(1/2(1/2为预期函数分界处))/m=0.02约为
    final_theta = feature_mapped_logistic_regression(power, l)
    x, y = find_decision_boundary(density, power, final_theta, threshhold)
    df = pd.read_csv('ex2data2.txt', names=['test1', 'test2', 'accepted'])
    sns.lmplot('test1', 'test2', hue='accepted', data=df, size=6, fit_reg=False)
    plt.scatter(x, y, c='R', s=10)#只管x1,x2两个参数
    plt.title('Decision boundary')
    plt.show()
draw_boundary(power=6, l=1)
c:\users\19664\appdata\local\programs\python\python37\lib\site-packages\ipykernel_launcher.py:7: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.
  import sys

在这里插入图片描述

draw_boundary(power=6, l=100)
c:\users\19664\appdata\local\programs\python\python37\lib\site-packages\ipykernel_launcher.py:7: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.
  import sys

在这里插入图片描述


  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值