机器学习中逻辑回归的Python实现——解决分类中的线性可分以及线性不可分问题

最新推荐文章于 2024-05-03 11:58:37 发布

秃头鸭鸭鸭

最新推荐文章于 2024-05-03 11:58:37 发布

阅读量181

点赞数

分类专栏：机器学习的数学文章标签：机器学习 python 逻辑回归

本文链接：https://blog.csdn.net/a3216828/article/details/130042162

版权

机器学习的数学专栏收录该内容

6 篇文章 0 订阅

订阅专栏

线性可分

import numpy as np
import matplotlib.pyplot as plt

# 读入训练数据
train = np.loadtxt('images2.csv', delimiter=',', skiprows=1)
train_x = train[:, 0:2]
train_y = train[:, 2]

# 参数的初始化
theta = np.random.rand(3)

# 标准化
# axis=0表示输出矩阵是1行，也就是求每一列的平均值。
# axis=1表示输出矩阵是1列, 也就是求每一行的平均值
mu = train_x.mean(axis=0)
sigma = train_x.std(axis=0)


def standardize(x):
    return (x - mu) / sigma


train_z = standardize(train_x)

# 构建训练数据矩阵
X = np.hstack((np.ones((train_z.shape[0], 1)), train_z))


# sigmoid函数
def f(X):
    return (1 / (1 + np.exp(-np.dot(X, theta))))

# 分类函数
def classify(x):
    return (f(x) >= 0.5).astype(np.int)

# 重复次数
epoch = 5000

# 学习率
ETA = 1e-3

# 更新次数
count = 1

# 重复学习
for _ in range(epoch):
    theta = theta - ETA * np.dot((f(X) - train_y), X)
    print('第{}次，theta={}'.format(count, theta))
    count += 1

# 绘图
x0 = np.linspace(-2, 2, 100)
plt.plot(train_z[train_y == 1, 0], train_z[train_y == 1, 1], 'o')
plt.plot(train_z[train_y == 0, 0], train_z[train_y == 0, 1], 'x')
plt.plot(x0, -(theta[0] + theta[1] * x0) / theta[2], linestyle='dashed')
plt.axis('scaled')
plt.show()

线性不可分

import numpy as np
import matplotlib.pyplot as plt

# 读入训练数据
train = np.loadtxt('data3.csv', delimiter=',', skiprows=1)
train_x = train[:, 0:2]
train_y = train[:, 2]

# 标准化
mu = train_x.mean(axis=0)
sigma = train_x.std(axis=0)


def standardize(x):
    return (x - mu) / sigma


train_z = standardize(train_x)


# 构建训练数据矩阵
def to_matrix(x):
    # np.newaxis 的功能是增加新的维度，但是要注意 np.newaxis 放的位置不同，产生的矩阵形状也不同。
    # x[:, np.newaxis] ，放在后面，会给列上增加维度
    # x[np.newaxis, :] ，放在前面，会给行上增加维度
    return np.hstack([np.ones([x.shape[0], 1]), x, x[:, 0, np.newaxis] ** 2])


X = to_matrix(train_z)

# 初始化参数
theta = np.random.rand(4)


# sigmoid函数
def f(X):
    return 1 / (1 + np.exp(-np.dot(X, theta)))


# 分类函数
def classify(X):
    return (f(X) >= 0.5).astype(np.int)


# 重复次数
epoch = 5000

# 学习率
ETA = 1e-3

# 更新次数
count = 1

# 精度的历史记录
accuracies = []

# 重复次数
for _ in range(epoch):
    theta = theta - ETA * np.dot(f(X) - train_y, X)
    # 计算现在的精度
    result = classify(X) == train_y
    accuracy = len(result[result == True]) / len(result)
    accuracies.append(accuracy)
    print('第{}次更新，theta={}'.format(count, theta))
    count += 1

# 绘图
x1 = np.linspace(-2, 2, 100)
plt.plot(train_z[train_y == 1, 0], train_z[train_y == 1, 1], 'o')
plt.plot(train_z[train_y == 0, 0], train_z[train_y == 0, 1], 'x')
plt.plot(x1, -(theta[3] * (x1 ** 2) + theta[1] * x1 + theta[0]) / theta[2], linestyle='dashed')
plt.axis('scaled')
plt.show()

# 绘制精度图
x = np.arange(len(accuracies))
plt.plot(x,accuracies)
plt.show()