吴恩达机器学习python作业之逻辑回归

abcd1233463457347

已于 2022-10-27 09:08:34 修改

阅读量224

点赞数

分类专栏：吴恩达机器学习作业文章标签：机器学习 python 逻辑回归

于 2022-10-27 09:01:21 首次发布

本文链接：https://blog.csdn.net/Ariya1234/article/details/127545107

版权

吴恩达机器学习作业专栏收录该内容

14 篇文章 0 订阅

订阅专栏

作业2.1 逻辑回归

参考链接：

(7条消息) 吴恩达|机器学习作业2.0Logistic 回归_学吧学吧终成学霸的博客-CSDN博客
先贴上运行出来的结果中RuntimeWarning的自己的理解：
RuntimeWarning: divide by zero encountered in log 可能在迭代的过程中log(1-h(x))中的1-h(x)逼近0。
numpy.matmul 函数返回两个数组的矩阵乘积。因此，第39行会出现无效值，是因为log(X)中的X可能取值为0.

import numpy as np
from matplotlib import pyplot as plt
from scipy import optimize as op


#1.读取数据
dt = np.loadtxt("E:\机器学习\吴恩达\data_sets\ex2data1.txt",delimiter=",")
#(100,3)
cols = dt.shape[1]
m = dt.shape[0] #样本数
x = dt[:,0:2] #(100, 2)
y = dt[:,-1]
y = y.astype(float)
y = np.array(y)

#2.观察数据分布，进行高级索引进行切片
#condition1获得最终被录取学生的数据元素的下标
condition1 = np.where(y == 1)
X1 = x[condition1]

#condition2获得最终未被录取学生的数据元素的下标
condition2 = np.where(y == 0)
X2 = x[condition2]
#画出散点图，红色圆形代表被录取，蓝色x形代表未被录取
plt.scatter(X1[:,0],X1[:,1],s=50,c='r',marker = "o",label = "positive")
plt.scatter(X2[:,0],X2[:,1],s=50,c='b',marker = "x",label = "negtive")


#3.计算损失函数与梯度下降方法
#定义sigmoid function
def sigmoid(x):
    return 1/(1+np.exp(-x))

#定义损失函数
def costFunction(theta,x,y):
    h = sigmoid(x@theta)  # @就是矩阵乘法
    # (118,)
    first = y @ np.log(h)  # 得到一个实数
    second = (1 - y) @ np.log(1 - h)  # 得到一个实数
    result0 = -1/m*(first + second)
    return result0

#定义梯度函数
#但是真正使用这里的梯度下降函数时会报错
#未正则化前的梯度计算函数
def gradient(theta,x,y):
    """
    计算未正则化的逻辑回归函数的梯度值
    :param theta: 需要进行梯度下降的theta参数，从而能够得到最小的代价函数
    :param x: 数据集输入，特征
    :param y: 数据集输出值，结果（0,1）
    :return: 一维数组，表示未正则化的逻辑回归函数中，theta各个维度应该下降的梯度大小
    """
    h= sigmoid(x@theta) #@就是矩阵乘法
    grad0 = 1/m*(x.T) @(h-y) #没有正则化前的梯度
    #这里要先除以m，否则防止数据过大
    return grad0

#4.改变x格式与theta格式，调用函数
a = np.ones((m,1))
x = np.concatenate((a,x),1)
theta = np.zeros(cols)

"""
scipy.optimize的fmin_tnc函数
func：优化的目标函数
x0：初值
fprime：提供优化函数func的梯度函数，不然优化函数func必须返回函数值和梯度，或者设置approx_grad=True
approx_grad :如果设置为True，会给出近似梯度
args：元组，是传递给优化函数的参数
"""
# result = op.fmin_tnc(func = costFunction , x0 = theta ,fprime = gradient,args=(x,y))
# print(result)
# #(array([-25.16131864,   0.20623159,   0.20147149]), 36, 0)
# result1 = op.fmin_tnc(func = costFunction , x0 = theta ,approx_grad=True,args=(x,y))
# print(result1)
# #(array([-24.97541176,   0.20475316,   0.19995585]), 224, 1)

#方法一 BFGS
theta1, cost1, *unused1 = op.fmin_bfgs(f=costFunction, fprime=gradient, x0=theta, args=(x, y), maxiter=400, full_output=True)
# array([-25.16133284,   0.2062317 ,   0.2014716 ]

#方法二 牛顿共轭梯度
theta2, cost2, *unused2 = op.fmin_ncg(f=costFunction, fprime=gradient, x0=theta, args=(x, y), maxiter=400, full_output=True)
#array([-25.16463945,   0.20625816,   0.20149835])

#方法三 L-BFGS-B
theta3, cost3, *unused3 = op.fmin_l_bfgs_b(func=costFunction, fprime=gradient, x0=theta, args=(x, y), maxiter=400)
#array([-25.16141038,   0.20623233,   0.20147212]

#5.画出决策边界
'''画出决策边界'''
#x@theta=0为决策边界，即theta[0]*1+theta[1]*x[1]+theta[2]*x[2]=0
def plotBD(theta,method,color):
    x1 = np.arange(25, 100, step=0.1)
    x2 = -(theta[0]+theta[1]*x1)/theta[2]
    plt.plot(x1, x2, label=method, c=color)
    plt.legend(loc=3)
    plt.title('The Decision Boundary')

plt.figure(1)
plotBD(theta1,'BFGS','g')
plotBD(theta2,'NCG','r')
plotBD(theta3,'L-BFGS-B','b')
plt.show()