Python机器学习1-吴恩达课后作业-线性回归分析ex1

BetterFlow_CFD

已于 2023-12-28 14:35:45 修改

阅读量463

点赞数 12

分类专栏：机器学习文章标签： python 机器学习开发语言

于 2023-12-26 10:32:07 首次发布

本文链接：https://blog.csdn.net/b_etter_man/article/details/135200024

版权

机器学习专栏收录该内容

6 篇文章 1 订阅

订阅专栏

1. 一元线性回归分析

分析对象：

人口、利润分析

计算程序：

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


def load_data(file_path):
    # 加载数据
    data1 = pd.read_csv(file_path, header=None, names=['Population', 'Profit'])
    return data1


def gradient_descent(X_b, y, theta, eta, n_iterations):
    # 初始化一个数组用于存储每次迭代的代价函数值
    cost_history1 = np.zeros(n_iterations)

    for iteration in range(n_iterations):
        # 计算梯度
        gradients = 1/len(y) * X_b.T.dot(X_b.dot(theta) - y)
        # 更新参数
        theta = theta - eta * gradients
        # 计算代价函数并存储
        cost = np.mean((X_b.dot(theta) - y) ** 2)/2
        cost_history1[iteration] = cost

    return theta, cost_history1


def plot_results(X, y, X_b, theta, cost_history):
    # 绘制原始数据和拟合的直线
    plt.scatter(X, y)
    plt.plot(X, X_b.dot(theta), 'r-')
    plt.xlabel('X')
    plt.ylabel('y')
    plt.title('Linear Regression with Gradient Descent')

    # 绘制代价函数计算结果曲线
    plt.figure()
    plt.plot(range(1, n_iterations + 1), cost_history, color='blue')
    plt.xlabel('Iterations')
    plt.ylabel('Cost')
    plt.title('Cost Function Over Iterations')

    plt.show()


if __name__ == '__main__':
    # 加载数据
    # 单变量线性回归数据为：ex1data1.txt
    path = '.\\data\\ex1data1.txt'
    data = load_data(path)

    # 准备数据
    X = data[['Population']].values
    y = data[['Profit']].values
    X_b = np.c_[np.ones((X.shape[0], 1)), X]

    # 随机初始化参数
    theta = np.random.randn(2, 1)
    eta = 0.01  # 学习率
    n_iterations = 1000

    # 使用梯度下降算法求解参数
    theta, cost_history = gradient_descent(X_b, y, theta, eta, n_iterations)

    # 打印最终的参数
    print("最终参数(theta):", theta)

    # 绘制图像
    plot_results(X, y, X_b, theta, cost_history)

运行结果

说明

解释：
- `import pandas as pd`：导入pandas库并用`pd`作为别名。
- `path = "your_file_path.csv"`：指定CSV文件的路径。请替换为你的实际文件路径。
- `pd.read_csv(path, header=None, names=['Population', 'Profit'])`：使用`read_csv`函数读取CSV文件。其中：
- `header=None` 表示数据文件没有头部信息。
- `names=['Population', 'Profit']` 为DataFrame的列名，用于标识每一列数据的含义。在这个例子中，第一列是'Population'（人口），第二列是'Profit'（利润）。

2. 多元线性回归分析

分析对象

面积、房间数、价格

计算程序

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


def load_data(file_path):
    # 加载数据
    data1 = pd.read_csv(file_path, header=None, names=['Area', 'Num', 'Price'])
    return data1


def gradient_descent(X_b, y, theta, eta, n_iterations):
    # 初始化一个数组用于存储每次迭代的代价函数值
    cost_history1 = np.zeros(n_iterations)

    for iteration in range(n_iterations):
        # 计算梯度
        gradients = 1/len(y) * X_b.T.dot(X_b.dot(theta) - y)
        # 更新参数
        theta = theta - eta * gradients
        # 计算代价函数并存储
        cost = np.mean((X_b.dot(theta) - y) ** 2)/2
        cost_history1[iteration] = cost

    return theta, cost_history1


def plot_results(X, y, X_b, theta, cost_history):
    # 绘制原始数据和拟合的直线
    # plt.scatter(X, y)
    # plt.plot(X, X_b.dot(theta), 'r-')
    # plt.xlabel('X')
    # plt.ylabel('y')
    # plt.title('Linear Regression with Gradient Descent')

    # 绘制代价函数计算结果曲线
    plt.figure()
    plt.plot(range(1, n_iterations + 1), cost_history, color='blue')
    plt.xlabel('Iterations')
    plt.ylabel('Cost')
    plt.title('Cost Function Over Iterations')

    plt.show()


if __name__ == '__main__':
    # 加载数据
    # 多变量线性回归数据为：ex1data2.txt
    path = '.\\data\\ex1data2.txt'
    data = load_data(path)


    # 准备数据
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values.reshape(-1, 1)

    # 对数据进行均值归一化
    X = (X - X.mean()) / X.std()
    y = (y - y.mean()) / y.std()

    X_b = np.c_[np.ones((X.shape[0], 1)), X]

    # 随机初始化参数
    theta = np.random.randn(X_b.shape[1], 1)
    eta = 0.01  # 学习率
    n_iterations = 3000

    # 使用梯度下降算法求解参数
    theta, cost_history = gradient_descent(X_b, y, theta, eta, n_iterations)

    # 打印最终的参数
    print("最终参数(theta):", theta)

    # 绘制图像
    plot_results(X, y, X_b, theta, cost_history)