Machine Learning(Andrew Ng）ex2.logistic regression

最新推荐文章于 2020-08-03 13:40:40 发布

wyh196646

最新推荐文章于 2020-08-03 13:40:40 发布

阅读量239

点赞数

分类专栏： Machine Learning

本文链接：https://blog.csdn.net/wyh196646/article/details/102092141

版权

Machine Learning 专栏收录该内容

6 篇文章 0 订阅

订阅专栏

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report


data=pd.read_csv('ex2data1.txt',header=None,names=['Exam1','Exam2','Admitted'])
data.head()

	Exam1	Exam2	Admitted
0	34.623660	78.024693	0
1	30.286711	43.894998	0
2	35.847409	72.902198	0
3	60.182599	86.308552	1
4	79.032736	75.344376	1

data.head()

	Exam1	Exam2	Admitted
0	34.623660	78.024693	0
1	30.286711	43.894998	0
2	35.847409	72.902198	0
3	60.182599	86.308552	1
4	79.032736	75.344376	1

positive=data[data['Admitted'].isin([1])]#用法有待考究
negative=data[data['Admitted'].isin([0])]
positive.head()
data['Admitted'].isin([1]).head()

0    False
1    False
2    False
3     True
4     True
Name: Admitted, dtype: bool

positive.head()

	Exam1	Exam2	Admitted
3	60.182599	86.308552	1
4	79.032736	75.344376	1
6	61.106665	96.511426	1
7	75.024746	46.554014	1
8	76.098787	87.420570	1

sns.set(context='notebook',style='darkgrid') 
sns.lmplot('Exam1','Exam2',hue='Admitted',data=data,fit_reg=False)

<seaborn.axisgrid.FacetGrid at 0x1d1d7c26e10>

在这里插入图片描述

def sigmoid(z):
    return 1/(1+np.exp(-z))

def get_X(df):
    data.insert(0,'Ones',1)
    return data.iloc[:, :-1].as_matrix()

def get_y(df):
    return np.array(df.iloc[:,-1])

def normalize_feature(df):
    df=(df-df.mean())/df.std()
    return df

theta=np.zeros(3)

Logisc Regression cost function

$J(\theta)=\frac{1}{m} \sum_{i=1}^{m}\left[-y^{(i)} \log \left(h_{\theta}\left(x^{(i)}\right)\right)-\left(1-y^{(i)}\right) \log \left(1-h_{\theta}\left(x^{(i)}\right)\right)\right]$

def cost(theta,X,y):#@代表矩阵乘法
    return np.mean(-y*np.log(sigmoid(X@theta))-(1-y)*np.log(1-sigmoid(X@theta)))

X=get_X(data)
y=get_y(data)

c:\users\19664\appdata\local\programs\python\python37\lib\site-packages\ipykernel_launcher.py:3: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.
  This is separate from the ipykernel package so we can avoid doing imports until

cost(theta,X,y)

0.6931471805599453

gradient descent(梯度下降)

这是批量梯度下降（batch gradient descent）
转化为向量化计算： $\frac{1}{m} X^T( Sigmoid(X\theta) - y )$
$\frac{\partial J\left( \theta \right)}{\partial {{\theta }_{j}}}=\frac{1}{m}\sum\limits_{i=1}^{m}{({{h}_{\theta }}\left( {{x}^{(i)}} \right)-{{y}^{(i)}})x_{_{j}}^{(i)}}$

def gradient_decent(theta, x, y):
#     '''just 1 batch gradient'''
    return (1 / len(x)) * (((sigmoid(x @ theta) - y).T) @ x)

gradient_decent(theta,X,y)

array([ -0.1       , -12.00921659, -11.26284221])

import scipy

import scipy.optimize as opt

res=opt.minimize(fun=cost,x0=theta,args=(X,y),method='Newton-CG', jac=gradient_decent)
print(res)

     fun: 0.20349770628264552
     jac: array([-5.89271913e-06, -4.23590443e-04, -5.16057328e-04])
 message: 'Optimization terminated successfully.'
    nfev: 71
    nhev: 0
     nit: 28
    njev: 242
  status: 0
 success: True
       x: array([-25.15571627,   0.20618678,   0.20142615])

训练集预测检验

def predict(x,theta):
        h=sigmoid(x@theta)
        return (h >= 0.5).astype(int)#用法有待考究

寻找决策边界

$\times \theta = 0$

\[{{h}_{\theta }}\left( x \right)=\frac{1}{1+{{e}^{-{{\theta }^{T}}X}}}\]
$h_{\theta}(x)=1/2$
$\theta_{0}+\theta_{1} X_{1}+\theta_{2} X_{2}=0$
$X_{2}=\frac{\theta_{0}+\theta_{1} \cdot x_{1}}{\theta_{2}}$

coef = -(res.x / res.x[2])
x = np.arange(130, step=0.1)
y = coef[0] + coef[1]*x

sns.set(context='notebook',style='darkgrid') 
sns.lmplot('Exam1','Exam2',hue='Admitted',data=data,fit_reg=False)
plt.plot(x, y, 'r')
plt.xlim(0, 130)
plt.ylim(0, 130)
plt.title('Decision Boundary')
plt.show()

在这里插入图片描述

逻辑化正则回归(处理关系过拟合的问题）

data2=pd.read_csv('ex2data2.txt',names=['test1','test2','accepted'])
data2.head()

	test1	test2	accepted
0	0.051267	0.69956	1
1	-0.092742	0.68494	1
2	-0.213710	0.69225	1
3	-0.375000	0.50219	1
4	-0.513250	0.46564	1

sns.set(context='notebook',style='darkgrid')
sns.lmplot('test1','test2',hue='accepted',data=data2,fit_reg=False)

<seaborn.axisgrid.FacetGrid at 0x1d1dad38588>

在这里插入图片描述

data2.head()

	test1	test2	accepted
0	0.051267	0.69956	1
1	-0.092742	0.68494	1
2	-0.213710	0.69225	1
3	-0.375000	0.50219	1
4	-0.513250	0.46564	1

feature mapping(特征扩张，构造多项式特征）

def feature_mapping(x1,x2,power):#特征映射，创造新特征 
    data=pd.DataFrame()
    for i in range(power+1):#for i in range(0)这个是1坑，遵从左开右闭的规则         
        for j in range(0,i+1):     
            data['F' + str(j) + str(i)] = np.power(x1, i-j) * np.power(x2, j)
    return data

x1=np.array(data2.test1)
x2=np.array(data2.test2)
data_3=feature_mapping(x1,x2,6)
data_3.head()

	F00	F01	F11	F02	F12	F22	F03	F13	F23	F33	...	F35	F45	F55	F06	F16	F26	F36	F46	F56	F66
0	1.0	0.051267	0.69956	0.002628	0.035864	0.489384	0.000135	0.001839	0.025089	0.342354	...	0.000900	0.012278	0.167542	1.815630e-08	2.477505e-07	0.000003	0.000046	0.000629	0.008589	0.117206
1	1.0	-0.092742	0.68494	0.008601	-0.063523	0.469143	-0.000798	0.005891	-0.043509	0.321335	...	0.002764	-0.020412	0.150752	6.362953e-07	-4.699318e-06	0.000035	-0.000256	0.001893	-0.013981	0.103256
2	1.0	-0.213710	0.69225	0.045672	-0.147941	0.479210	-0.009761	0.031616	-0.102412	0.331733	...	0.015151	-0.049077	0.158970	9.526844e-05	-3.085938e-04	0.001000	-0.003238	0.010488	-0.033973	0.110047
3	1.0	-0.375000	0.50219	0.140625	-0.188321	0.252195	-0.052734	0.070620	-0.094573	0.126650	...	0.017810	-0.023851	0.031940	2.780914e-03	-3.724126e-03	0.004987	-0.006679	0.008944	-0.011978	0.016040
4	1.0	-0.513250	0.46564	0.263426	-0.238990	0.216821	-0.135203	0.122661	-0.111283	0.100960	...	0.026596	-0.024128	0.021890	1.827990e-02	-1.658422e-02	0.015046	-0.013650	0.012384	-0.011235	0.010193

5 rows × 28 columns

regularized cost（正则化代价函数）

$J\left( \theta \right)=\frac{1}{m}\sum\limits_{i=1}^{m}{[-{{y}^{(i)}}\log \left( {{h}_{\theta }}\left( {{x}^{(i)}} \right) \right)-\left( 1-{{y}^{(i)}} \right)\log \left( 1-{{h}_{\theta }}\left( {{x}^{(i)}} \right) \right)]}+\frac{\lambda }{2m}\sum\limits_{j=1}^{n}{\theta _{j}^{2}}$

theta=np.zeros(data_3.shape[1])
X=feature_mapping(x1,x2,power=6)
print(X.shape)
y=get_y(data2)
print(y.shape)
print(theta.shape)

(118, 28)
(118,)
(28,)

def regularized_cost(theta,x,y,l=1):
    theta_new=theta[1:]
    regularized_term=(l/(2*len(X)))*np.sum(np.power(theta_new,2))
    return cost(theta,x,y)+regularized_term

regularized_cost(theta,X,y,l=1)

0.6931471805599454

regularized gradient(正则化梯度)

$\frac{\partial J\left( \theta \right)}{\partial {{\theta }_{j}}}=\left( \frac{1}{m}\sum\limits_{i=1}^{m}{\left( {{h}_{\theta }}\left( {{x}^{\left( i \right)}} \right)-{{y}^{\left( i \right)}} \right)} \right)+\frac{\lambda }{m}{{\theta }_{j}}\text{ }\text{ for j}\ge \text{1}$

def regularized_gradient(theta,X,y,l=1):
    theta_new=theta[1:]
    regularized_term=np.concatenate([np.array([0]),(l/len(X))*theta_new])
    return np.array(gradient_decent(theta,X,y)+regularized_term)

regularized_gradient(theta, X, y)

array([8.47457627e-03, 1.87880932e-02, 7.77711864e-05, 5.03446395e-02,
       1.15013308e-02, 3.76648474e-02, 1.83559872e-02, 7.32393391e-03,
       8.19244468e-03, 2.34764889e-02, 3.93486234e-02, 2.23923907e-03,
       1.28600503e-02, 3.09593720e-03, 3.93028171e-02, 1.99707467e-02,
       4.32983232e-03, 3.38643902e-03, 5.83822078e-03, 4.47629067e-03,
       3.10079849e-02, 3.10312442e-02, 1.09740238e-03, 6.31570797e-03,
       4.08503006e-04, 7.26504316e-03, 1.37646175e-03, 3.87936363e-02])

import scipy.optimize as opt
print('init cost={}'.format(regularized_cost(theta,X,y)))
res =opt.minimize(fun=regularized_cost,x0=theta,args=(X,y),method='Newton-CG',jac=regularized_gradient)
res

init cost=0.6931471805599454





     fun: 0.5290027297129302
     jac: array([ 4.52421907e-08, -4.22499842e-09, -1.08282314e-08, -2.83522806e-08,
        1.82082428e-08, -5.66533954e-08, -4.95369310e-09, -2.16518576e-08,
        5.33617573e-09, -7.14897884e-09, -2.90197498e-08,  2.87231263e-09,
       -3.76049930e-09,  8.31922510e-10, -2.38721746e-08, -7.87126082e-09,
       -7.91557401e-09,  6.58521091e-10, -3.91583210e-09,  2.11321802e-09,
       -2.24182045e-09,  1.16699260e-09,  7.98161960e-10, -5.02280356e-09,
        2.78008348e-09,  5.86976469e-10,  6.29971969e-09, -2.17798713e-09])
 message: 'Optimization terminated successfully.'
    nfev: 7
    nhev: 0
     nit: 6
    njev: 68
  status: 0
 success: True
       x: array([ 1.27274165,  0.62527222,  1.18108934, -2.01996318, -0.91742302,
       -1.43166909,  0.12400628, -0.36553617, -0.35723928, -0.17513015,
       -1.4581588 , -0.05098913, -0.61555469, -0.27470694, -1.19281753,
       -0.24218865, -0.2060067 , -0.04473068, -0.2777846 , -0.29537814,
       -0.45635668, -1.0432015 ,  0.02777166, -0.29243155,  0.01556695,
       -0.32737909, -0.14388666, -0.92465139])

final_theta = res.x
y_pred = predict(X, final_theta)

print(classification_report(y, y_pred))

              precision    recall  f1-score   support

           0       0.90      0.75      0.82        60
           1       0.78      0.91      0.84        58

    accuracy                           0.83       118
   macro avg       0.84      0.83      0.83       118
weighted avg       0.84      0.83      0.83       118

决策边界可视化

def feature_mapped_logistic_regression(power,l):
    df=pd.read_csv('ex2data2.txt',names=['test1','test2','accepted'])
    x1=np.array(df.test1)
    x2=np.array(df.test2)
    y=get_y(df)
    X = feature_mapping(x1, x2, power)
    theta = np.zeros(X.shape[1])
    res = opt.minimize(fun=regularized_cost,
                       x0=theta,
                       args=(X, y, l),
                       method='TNC',
                       jac=regularized_gradient)
    final_theta = res.x

    return final_theta

def find_decision_boundary(density,power,theta,threshhold):
    t1=np.linspace(-1,1.5,density)
    t2=np.linspace(-1,1.5,density)
    cordinates=[(x,y) for x in t1 for y in t2]
    x_cord, y_cord = zip(*cordinates)
    mapped_cord=feature_mapping(x_cord, y_cord, power)#创造新的数据集
    inner_product = mapped_cord.as_matrix() @ theta.T#97*28和28*1，一维度矩阵不区分行列
    decision = mapped_cord[np.abs(inner_product) < threshhold]
    return decision['F11'],decision['F01']

def draw_boundary(power, l):
    density = 1000
    threshhold = 2 * 10**-3#这里这个值的计算方法为每一行为log(1/2(1/2为预期函数分界处))/m=0.02约为
    final_theta = feature_mapped_logistic_regression(power, l)
    x, y = find_decision_boundary(density, power, final_theta, threshhold)
    df = pd.read_csv('ex2data2.txt', names=['test1', 'test2', 'accepted'])
    sns.lmplot('test1', 'test2', hue='accepted', data=df, size=6, fit_reg=False)
    plt.scatter(x, y, c='R', s=10)#只管x1，x2两个参数
    plt.title('Decision boundary')
    plt.show()

draw_boundary(power=6, l=1)

c:\users\19664\appdata\local\programs\python\python37\lib\site-packages\ipykernel_launcher.py:7: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.
  import sys

在这里插入图片描述

draw_boundary(power=6, l=100)

c:\users\19664\appdata\local\programs\python\python37\lib\site-packages\ipykernel_launcher.py:7: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.
  import sys

在这里插入图片描述