逻辑线性回归，使用优化函数来求theta

高手高手高高手*

已于 2023-09-18 11:35:46 修改

阅读量83

点赞数

分类专栏：机器学习文章标签：线性回归 python 算法

于 2023-09-17 14:59:01 首次发布

本文链接：https://blog.csdn.net/2301_79652490/article/details/132942933

版权

机器学习专栏收录该内容

3 篇文章 0 订阅

订阅专栏

本文介绍了如何使用Python中的numpy、pandas和matplotlib库实现逻辑回归模型，包括数据预处理、代价函数计算、梯度下降算法以及决策曲线的绘制。最后展示了如何预测学生的录取概率和评估模型的准确性。

摘要由CSDN通过智能技术生成

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 导入并处理数据
path = "E:\\study_data\\test1.txt"
data = pd.read_csv(path, header=None, names=['Exam 1', 'Exam 2', 'Admitted'])

# 可视化训练集
positive = data[data['Admitted'].isin([1])]
negative = data[data['Admitted'].isin([0])]


# fig, ax = plt.subplots(figsize=(12, 8))
# ax.scatter(positive['Exam 1'], positive['Exam 2'], s=50, c='b', marker='o', label='Admitted')
# ax.scatter(negative['Exam 1'], negative['Exam 2'], s=50, c='r', marker='x', label='Not Admitted')
# ax.legend()
# ax.set_xlabel('Exam 1 Score')
# ax.set_ylabel('Exam 2 Score')
# plt.show()


# 实现sigmoid函数
def sigmoid(z):
    return 1 / (1 + np.exp(-z))


# 实现代价函数
def cost(theta, X, y):
    theta = np.matrix(theta)
    # 将数组变成矩阵，以便于实现矩阵乘法
    X = np.matrix(X)
    y = np.matrix(y)
    # X，y都是以矩阵的方式一次性传入，便于处理所有数据
    first = np.multiply(-y, np.log(sigmoid(X * theta.T)))
    second = np.multiply((1 - y), np.log(1 - sigmoid(X * theta.T)))
    return np.sum(first - second) / (len(X))


# 初始化数据
data.insert(0, 'ones', 1)
cols = data.shape[1]
X = data.iloc[:, 0:cols - 1]
y = data.iloc[:, cols - 1:cols]
theta = np.zeros(3)
X = np.array(X)
y = np.array(y)

# 当theta全都是0的情况下的代价
print(cost(theta, X, y))


# 实现梯度下降函数,不更新theta使用库函数更新theta
def gradientDescent(theta, X, y):
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)

    parameters = int(theta.ravel().shape[1])
    grad = np.zeros(parameters)
    error = sigmoid(X * theta.T) - y
    for i in range(parameters):
        # 每个theta更新时后面乘的x是不同的
        term = np.multiply(error, X[:, i])
        grad[i] = np.sum(term) / len(X)
        # 返回的是theta中的每个元素对应的导函数
    return grad


# 用库函数计算theta值
import scipy.optimize as opt

result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradientDescent, args=(X, y))
# func对应的参数是要优化的函数，x0表示传入的theta的初始值，fprime是传入自己写的梯度下降函数，args是参数
# fmin_tnc有三个返回值，分别是解决方案， 函数评估数量，返回码
print(result)

# 用计算出的theta，代入回代价函数进行计算
print(cost(result[0], X, y))

# # 画出决策曲线
plotting_x = np.linspace(30, 100, 100)
plotting_y = (-result[0][0] - result[0][1] * plotting_x) / result[0][2]
# 决策边界，X*theta.T=0的那条线，也就是plotting_y
fig, ax = plt.subplots(figsize=(12, 8))
ax.plot(plotting_x, plotting_y, 'y', label='Prediction')
ax.scatter(positive['Exam 1'], positive['Exam 2'], s=50, c='b', marker='o', label='Admitted')
ax.scatter(negative['Exam 1'], negative['Exam 2'], s=50, c='r', marker='x', label='Not Admitted')
ax.legend()
ax.set_xlabel('Exam 1 Score')
ax.set_ylabel('Exam 2 Score')
plt.show()


# 评价逻辑回归模型
def hfunc1(theta, X):
    return sigmoid(np.dot(theta.T, X))


# 如果一个学生exam1为45分，exam2为85，求录取概率
print(hfunc1(result[0], [1, 45, 85]))


# 定义预测函数
def predict(theta, X):
    probability = sigmoid(X * theta.T)
    # 因为传入的X可能是个多维矩阵，所以probability有很多结果
    return [1 if x >= 0.5 else 0 for x in probability]


# 统计预测的正确率
theta_min = np.matrix(result[0])
predictions = predict(theta_min, X)
correct = [1 if ((a == 1 and b == 1) or (a == 0 and b == 0)) else 0 for (a, b) in zip(predictions, y)]
# zip将实际值和预测值放一起，如果实际值与预测值相同返回1，否则返回0
accuracy = (sum(map(int, correct)) / len(correct))
# map函数是用来将correct数组中每个元素都转化为整型，求和后是正确的样本数
print(f"{accuracy:.2f}")