import numpy as np
import matplotlib.pyplot as plt
# 定义数据集,X为特征,y为标签(1代表好瓜,0代表坏瓜)
X = np.array([[0.697, 0.460],
[0.774, 0.376],
[0.634, 0.264],
[0.608, 0.318],
[0.556, 0.215],
[0.403, 0.237],
[0.481, 0.149],
[0.437, 0.211],
[0.666, 0.091],
[0.243, 0.267],
[0.245, 0.057],
[0.343, 0.099],
[0.639, 0.161],
[0.657, 0.198],
[0.360, 0.370],
[0.593, 0.042],
[0.719, 0.103]])
y = np.array([1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
# 增加偏置项,通过在X矩阵左侧添加一列全为1的向量,方便矩阵运算
X_with_bias = np.c_[np.ones((X.shape[0], 1)), X]
# 定义sigmoid函数
def sigmoid(z):
return 1 / (1 + np.exp(-z))
# 定义损失函数
def compute_loss(X, y, theta):
m = len(y)
h = sigmoid(X.dot(theta))
epsilon = 1e-5 # 避免log(0)的情况
loss = -1 / m * np.sum(y * np.log(h + epsilon) + (1 - y) * np.log(1 - h + epsilon))
return loss
# 定义梯度下降函数,以最小化损失函数来更新模型参数
def gradient_descent(X, y, learning_rate=0.01, epochs=10000):
m, n = X.shape
theta = np.zeros(n)
loss_history = []
for epoch in range(epochs):
h = sigmoid(X.dot(theta))
gradient = 1 / m * X.T.dot(h - y)
theta -= learning_rate * gradient
loss = compute_loss(X, y, theta)
loss_history.append(loss)
return theta, loss_history
# 进行梯度下降训练
theta, loss_history = gradient_descent(X_with_bias, y)
# 可视化数据和决策边界
plt.figure()
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis', marker='o', edgecolor='k')
plt.xlabel("密度")
plt.ylabel("含糖率")
# 绘制决策边界
x_values = [np.min(X[:, 0]), np.max(X[:, 0])]
y_values = [-(theta[0] + theta[1] * x) / theta[2] for x in x_values]
plt.plot(x_values, y_values, color='red', linestyle='dashed', linewidth=2)
plt.title("Decision Boundary")
plt.show()
对数回归二分类
最新推荐文章于 2024-09-15 23:30:56 发布