逻辑回归（代码部分）

最新推荐文章于 2021-10-28 21:55:00 发布

JK Chen

最新推荐文章于 2021-10-28 21:55:00 发布

阅读量370

点赞数 1

分类专栏：机器学习 All

本文链接：https://blog.csdn.net/jk_chen_acmer/article/details/103014068

版权

All 同时被 2 个专栏收录

629 篇文章 5 订阅

订阅专栏

机器学习

25 篇文章 2 订阅

订阅专栏

所有代码都是通过理论自己写出来的，实属不易，多多支持
逻辑回归理论部分：https://blog.csdn.net/jk_chen_acmer/article/details/103008521

logistic回归……梯度下降算法

单特征值双答案值（0或1），y当 $x < 5$ 时为0，x $> = 5$ 时为1，x左右浮动1单位

import numpy as np
import matplotlib.pyplot as plt
import copy


# 获取随机数据
# X：m*n，第一列默认为1
# Y：m*1
# y当x<5时为0，x>=5时为1，x左右浮动1单位

def getData():
    m = 100
    n = 2  # 单特征值，所以为1+1
    X = np.mat(np.ones([m, n], float))
    Y = np.mat(np.ones([m, 1], float))

    X = X.T
    for i in range(1, n):
        X[i] = np.random.random(m) * 10
    X = X.T

    for i in range(0, m):
        if X[i, 1] >= 5:
            Y[i, 0] = 1
        else:
            Y[i, 0] = 0
        X[i, 1] += 1 - np.random.rand(1)[0] * 2
    return [X, Y]


# 获取预测值： tehta: 1*(n+1)  X: 1*(n+1)

def getH(Th, X):
    return 1 / (1 + np.exp(-(Th * X.T)[0, 0]))


def getCost(H, y):
    return y * np.log(H) + (1 - y) * np.log(1 - H)


# 梯度下降法
# 数据集：  X：m*n   Y：m*1

def gradientDescent(X, Y):
    m = X.shape[0]
    n = X.shape[1]
    rate = 3e-3  # 学习速率

    Th = np.mat(np.ones([1, n], float))  # 系数theta，为n长度的向量

    preJ = 0  # 通过theta和x计算代价函数J的值
    for i in range(0, m):
        preJ += getCost(getH(Th, X[i]), Y[i, 0])
    preJ /= -m

    while 1:
        Old = copy.copy(Th)
        for i in range(0, m):
            Th -= rate * (getH(Old, X[i]) - Y[i, 0]) * X[i]  # 梯度下降
        J = 0  # 通过theta和x计算代价函数J的值
        for i in range(0, m):
            J += getCost(getH(Th, X[i]), Y[i, 0])
        J /= -m
        if (abs(J - preJ) < 1e-5):  # 代价函数差异判断收敛
            break
        print(J)
        preJ = J
    return Th  # 返回预测函数参数向量


# 画出数据点 , 以及预测函数

def draw(X, Y, Th=None):
    m = X.shape[0]
    n = X.shape[1]

    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.set(xlim=[0, 10], ylim=[-0.1, 1.1],
           title='Data : y=[ x>=5 ]', ylabel='y', xlabel='x')
    ax.xaxis.set_ticks_position('bottom')
    ax.spines['bottom'].set_position(('data', 0))  # x轴：y=0

    for i in range(0, m):
        ax.plot(X[i, 1], Y[i, 0], 'o')
    if np.all(Th != None):
        x = np.linspace(0, 10, 100)
        y = []
        for i in range(0, 100):
            y.append(getH(Th, np.mat([1, x[i]]))) # 所有特征值
        ax.plot(x, y)

        _5 = 0.5 + 0 * x
        ax.plot(x, _5)
    plt.show()


if __name__ == '__main__':
    data = getData()
    ans = gradientDescent(data[0], data[1])
    print(ans)
    draw(data[0], data[1], ans)

运行结果：

有少部分点偏差是没有办法的，因为数据本身无法使用单特征值区分。
在这里插入图片描述

logistic回归……双特征值

$x1^2+(x2-5)^2<=5^2$ 内的点为1，其它为0

import numpy as np
import matplotlib.pyplot as plt
import copy


# 获取随机数据
# X：m*n，第一列默认为1
# Y：m*1
# 1: x1^2+(x2-5)^2<=5^2
# 2: others
# -> 1 x1 x1^2/36 x2 x2^2/100

def getData():
    m = 300
    n = 5  # 单特征值，所以为1+1
    X = np.mat(np.ones([m, n], float))
    Y = np.mat(np.ones([m, 1], float))

    X = X.T
    X[1] = np.random.random(m) * 6
    X[2]=np.multiply(X[1],X[1])
    X[3] = np.random.random(m) * 10
    X[4]=np.multiply(X[3],X[3])
    X[2]/=36
    X[4]/=100
    X = X.T

    for i in range(0, m):
        if X[i,1]**2+(X[i,3]-5)**2 <= 5**2:
            Y[i, 0] = 1
        else:
            Y[i, 0] = 0
    return [X, Y]


# 获取预测值： tehta: 1*(n+1)  X: 1*(n+1)

def getH(Th, X):
    return 1 / (1 + np.exp(-(Th * X.T)[0, 0]))


def getCost(H, y):
    return y * np.log(H) + (1 - y) * np.log(1 - H)


# 梯度下降法
# 数据集：  X：m*n   Y：m*1

def gradientDescent(X, Y):
    m = X.shape[0]
    n = X.shape[1]
    rate = 1e-3  # 学习速率

    Th = np.mat(np.ones([1, n], float))  # 系数theta，为n长度的向量

    preJ = 0  # 通过theta和x计算代价函数J的值
    for i in range(0, m):
        preJ += getCost(getH(Th, X[i]), Y[i, 0])
    preJ /= -m

    while 1:
        Old = copy.copy(Th)
        for i in range(0, m):
            Th -= rate * (getH(Old, X[i]) - Y[i, 0]) * X[i]  # 梯度下降
        J = 0  # 通过theta和x计算代价函数J的值
        for i in range(0, m):
            J += getCost(getH(Th, X[i]), Y[i, 0])
        J /= -m
        if (abs(J - preJ) < 1e-5):  # 代价函数差异判断收敛
            break
        print(J)
        preJ = J
    return Th  # 返回预测函数参数向量


# 画出数据点 , 以及预测函数

def draw(X, Y, Th=None):
    m = X.shape[0]
    n = X.shape[1]

    fig = plt.figure()
    ax = fig.add_subplot(1, 2, 1)
    ax.set(xlim=[0, 7], ylim=[0, 11],
           title='Data', ylabel='X2', xlabel='X1')
    ax.xaxis.set_ticks_position('bottom')
    ax.spines['bottom'].set_position(('data', 0))  # x轴：y=0

    ax2 = fig.add_subplot(1, 2, 2)
    ax2.set(xlim=[0, 7], ylim=[0, 11],
           title='Data', ylabel='X2', xlabel='X1')
    ax2.xaxis.set_ticks_position('bottom')
    ax2.spines['bottom'].set_position(('data', 0))  # x轴：y=0

    for i in range(0, m):
        if Y[i,0]==0:
            ax.plot(X[i, 1], X[i, 3], 'o', color="black")
        else:
            ax.plot(X[i, 1], X[i, 3], 'o', color="red")

    for i in range(0, m):
        H=getH(Th,X[i])
        if H<0.5:
            ax2.plot(X[i, 1], X[i, 3], 'o', color="black")
        else:
            ax2.plot(X[i, 1], X[i, 3], 'o', color="red")
    plt.show()


if __name__ == '__main__':
    data = getData()
    ans = gradientDescent(data[0], data[1])
    print(ans)
    draw(data[0], data[1], ans)

运行结果：

跑出的各个参数如下：[[ 3.93492015 -1.19671356 -3.61222607 1.4514791 -14.27452483]]，也就是说，预测的决策边界为： $3.9349-1.1967X_1-3.6122X_1^2/36+1.4515X_2-14.2745X_2^2/100=0$

我画出了原数据，以及每个数据最后的预测结果(右边)
在这里插入图片描述
问题分析：

我在 $x1^2+(x2-5)^2=5^2$ 上面找了4个点带入上述预测函数：

if __name__ == '__main__':
    X_1=5
    X_2=5
    print(3.9349-1.1967*X_1-3.6122/36*X_1**2+1.4515*X_2-14.2745/100*X_2**2)
    X_1=0
    X_2=10
    print(3.9349-1.1967*X_1-3.6122/36*X_1**2+1.4515*X_2-14.2745/100*X_2**2)
    X_1=0
    X_2=0
    print(3.9349-1.1967*X_1-3.6122/36*X_1**2+1.4515*X_2-14.2745/100*X_2**2)
    X_1=-5
    X_2=-5
    print(3.9349-1.1967*X_1-3.6122/36*X_1**2+1.4515*X_2-14.2745/100*X_2**2)

结果如下：

-0.8681972222222227
4.175399999999998
3.9349
-3.4161972222222228

结合数据和跑出来的图可以发现，圆的右侧的决策正确性较高，而上下两个部分的决策边界误差大。

探究一下原因，应该是因为那部分的数据对比较少，数据较为稀薄，不能让程序找出正确的分界线。如果给出较多上下对比的数据应该会有所改善

1000个数据时并不能对此有所改善：
在这里插入图片描述
但是300个数据包含上下对比，却可以很好的解决问题：

JK Chen

关注

1
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录