逻辑回归源码及实战案例

最新推荐文章于 2024-05-28 11:16:03 发布

一勇之夫

最新推荐文章于 2024-05-28 11:16:03 发布

阅读量1.2k

点赞数

分类专栏：数据处理 AI 文章标签：机器学习人工智能逻辑回归

本文链接：https://blog.csdn.net/yiyongzhifu/article/details/89501949

版权

数据处理同时被 2 个专栏收录

15 篇文章 2 订阅

订阅专栏

13 篇文章 0 订阅

订阅专栏

逻辑回归分析

相关视频地址 https://www.bilibili.com/video/av50360945/

数据地址 https://download.csdn.net/download/yiyongzhifu/11142350

# %load ../../standard_import.txt
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

from scipy.optimize import minimize

from sklearn.preprocessing import PolynomialFeatures

pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 150)
pd.set_option('display.max_seq_items', None)
 
#%config InlineBackend.figure_formats = {'pdf',}
%matplotlib inline

import seaborn as sns
sns.set_context('notebook')
sns.set_style('white')

def loaddata(file, delimeter):
    data = np.loadtxt(file, delimiter=delimeter)
    print('Dimensions: ',data.shape)
    print(data[1:6,:])
    return(data)

def plotData(data, label_x, label_y, label_pos, label_neg, axes=None):
    # 获得正负样本的下标(即哪些是正样本，哪些是负样本)
    neg = data[:,2] == 0
    pos = data[:,2] == 1
    
    if axes == None:
        axes = plt.gca()
    axes.scatter(data[pos][:,0], data[pos][:,1], marker='+', c='k', s=60, linewidth=2, label=label_pos)
    axes.scatter(data[neg][:,0], data[neg][:,1], c='y', s=60, label=label_neg)
    axes.set_xlabel(label_x)
    axes.set_ylabel(label_y)
    axes.legend(frameon= True, fancybox = True);


data = loaddata('data1.txt', ',')
X = np.c_[np.ones((data.shape[0],1)), data[:,0:2]]
y = np.c_[data[:,2]]
plotData(data, 'Exam 1 score', 'Exam 2 score', 'Pass', 'Fail')

#定义sigmoid函数
def sigmoid(z):
    return(1 / (1 + np.exp(-z)))

#定义损失函数
def costFunction(theta, X, y):
    m = y.size
    h = sigmoid(X.dot(theta))
    
    J = -1*(1/m)*(np.log(h).T.dot(y)+np.log(1-h).T.dot(1-y))
               
    if np.isnan(J[0]):
        return(np.inf)
    return(J[0])


#求解梯度
def gradient(theta, X, y):
    m = y.size
    h = sigmoid(X.dot(theta.reshape(-1,1)))
    
    grad =(1/m)*X.T.dot(h-y)

    return(grad.flatten())


initial_theta = np.zeros(X.shape[1])
cost = costFunction(initial_theta, X, y)
grad = gradient(initial_theta, X, y)
print('Cost: \n', cost)
print('Grad: \n', grad)

Cost: 
 0.69314718056
Grad: 
 [ -0.1        -12.00921659 -11.26284221]

最小化损失函数(梯度下降)¶

# 这里偷懒了，直接调用scipy里面的最小化损失函数的minimize函数
res = minimize(costFunction, initial_theta, args=(X,y), method=None, jac=gradient, options={'maxiter':400})
res

   status: 0
      jac: array([ -1.03340955e-08,  -1.48646939e-06,   2.79249972e-07])
  message: 'Optimization terminated successfully.'
      fun: 0.20349770158946398
  success: True
        x: array([-25.16133593,   0.20623171,   0.20147164])
     njev: 29
     nfev: 29
      nit: 25
 hess_inv: array([[  3.42070059e+03,  -2.74610638e+01,  -2.75463366e+01],
       [ -2.74610638e+01,   2.34843943e-01,   2.08263560e-01],
       [ -2.75463366e+01,   2.08263560e-01,   2.37408886e-01]])

预测部分

def predict(theta, X, threshold=0.5):
    p = sigmoid(X.dot(theta.T)) >= threshold
    return(p.astype('int'))


# 第一门课45分，第二门课85分的同学
# 咱们对他做个预测，拿到通过考试的概率
sigmoid(np.array([1, 45, 85]).dot(res.x.T))

p = predict(res.x, X) 
print('Train accuracy {}%'.format(100*sum(p == y.ravel())/p.size))

Train accuracy 89.0%

plt.scatter(45, 85, s=60, c='r', marker='v', label='(45, 85)')
plotData(data, 'Exam 1 score', 'Exam 2 score', 'Pass', 'Failed')
x1_min, x1_max = X[:,1].min(), X[:,1].max(),
x2_min, x2_max = X[:,2].min(), X[:,2].max(),
xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max))
h = sigmoid(np.c_[np.ones((xx1.ravel().shape[0],1)), xx1.ravel(), xx2.ravel()].dot(res.x))
h = h.reshape(xx1.shape)
plt.contour(xx1, xx2, h, [0.5], linewidths=1, colors='b');

做一下特征映射，生成多项式特征，最高的次数为6

poly = PolynomialFeatures(6)
XX = poly.fit_transform(data2[:,0:2])
XX.shape

(118, 28)

带正则化项的损失函数

J(θ)=1m∑i=1m[−y(i)log(hθ(x(i)))−(1−y(i))log(1−hθ(x(i)))]+λ2m∑j=1nθ2jJ(θ)=1m∑i=1m[−y(i)log(hθ(x(i)))−(1−y(i))log(1−hθ(x(i)))]+λ2m∑j=1nθj2

向量化的损失函数

J(θ)=1m((log(g(Xθ))Ty+(log(1−g(Xθ))T(1−y))+λ2m∑j=1nθ2jJ(θ)=1m((log(g(Xθ))Ty+(log(1−g(Xθ))T(1−y))+λ2m∑j=1nθj2

def costFunctionReg(theta, reg, *args):
    m = y.size
    h = sigmoid(XX.dot(theta))
    
    J = -1*(1/m)*(np.log(h).T.dot(y)+np.log(1-h).T.dot(1-y)) + (reg/(2*m))*np.sum(np.square(theta[1:]))
    
    if np.isnan(J[0]):
        return(np.inf)
    return(J[0])

还是偏导(梯度)

δJ(θ)δθj=1m∑i=1m(hθ(x(i))−y(i))x(i)j+λmθjδJ(θ)δθj=1m∑i=1m(hθ(x(i))−y(i))xj(i)+λmθj

向量化

δJ(θ)δθj=1mXT(g(Xθ)−y)+λmθjδJ(θ)δθj=1mXT(g(Xθ)−y)+λmθj

Note: 要注意的是参数 θ0 是不需要正则化的Note: 要注意的是参数 θ0 是不需要正则化的

def gradientReg(theta, reg, *args):
    m = y.size
    h = sigmoid(XX.dot(theta.reshape(-1,1)))
      
    grad = (1/m)*XX.T.dot(h-y) + (reg/m)*np.r_[[[0]],theta[1:].reshape(-1,1)]
        
    return(grad.flatten())

initial_theta = np.zeros(XX.shape[1])
costFunctionReg(initial_theta, 1, XX, y)

0.69314718055994529

fig, axes = plt.subplots(1,3, sharey = True, figsize=(17,5))

# 决策边界，咱们分别来看看正则化系数lambda太大太小分别会出现什么情况
# Lambda = 0 : 就是没有正则化，这样的话，就过拟合咯
# Lambda = 1 : 这才是正确的打开方式
# Lambda = 100 : 卧槽，正则化项太激进，导致基本就没拟合出决策边界

for i, C in enumerate([0, 1, 100]):
    # 最优化 costFunctionReg
    res2 = minimize(costFunctionReg, initial_theta, args=(C, XX, y), method=None, jac=gradientReg, options={'maxiter':3000})
    
    # 准确率
    accuracy = 100*sum(predict(res2.x, XX) == y.ravel())/y.size    

    # 对X,y的散列绘图
    plotData(data2, 'Microchip Test 1', 'Microchip Test 2', 'y = 1', 'y = 0', axes.flatten()[i])
    
    # 画出决策边界
    x1_min, x1_max = X[:,0].min(), X[:,0].max(),
    x2_min, x2_max = X[:,1].min(), X[:,1].max(),
    xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max))
    h = sigmoid(poly.fit_transform(np.c_[xx1.ravel(), xx2.ravel()]).dot(res2.x))
    h = h.reshape(xx1.shape)
    axes.flatten()[i].contour(xx1, xx2, h, [0.5], linewidths=1, colors='g');       
    axes.flatten()[i].set_title('Train accuracy {}% with Lambda = {}'.format(np.round(accuracy, decimals=2), C))

一勇之夫

关注

0
点赞
踩
13

收藏

觉得还不错? 一键收藏
0
评论
逻辑回归源码及实战案例

逻辑回归分析相关视频地址https://www.bilibili.com/video/av50360945/数据地址 https://download.csdn.net/download/yiyongzhifu/11142350# %load ../../standard_import.txtimport pandas as pdimport numpy as npimp...
复制链接

扫一扫