吴恩达机器学习作业Python实现(一)：线性回归

最新推荐文章于 2022-12-04 21:00:42 发布

DaYinYi

最新推荐文章于 2022-12-04 21:00:42 发布

阅读量392

点赞数 1

文章标签：机器学习

本文链接：https://blog.csdn.net/qq_36998053/article/details/109956751

版权

人工智能专栏收录该内容

17 篇文章 11 订阅

订阅专栏

参考：https://blog.csdn.net/Cowry5/article/details/80174130

自己补充了一些：（由于想独立敲代码，所以将照写的代码搬至csdn中，如果大家要看，还是去上面的链接中学习）

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 读取数据
path = 'I:\\吴恩达机器学习\\test1\\数据集\\ex1data1.txt'
# names 添加列名，header 用指定的行为作为标题，若原无标题且指定标题则设为None
data = pd.read_csv(path, header=None, names=['Population', 'Profit'])
# head()函数，有一个参数n，返回前n行数据，n默认等于5
data.head()
# describe()函数，可以查看数据的基本情况，包括：count非空数值，mean平均值，std标准差，max,min,(25%,50%,75%)分位数
data.describe()

# scatter : 散点图
data.plot(kind='scatter', x='Population', y='Profit', figsize=(8, 5))
plt.show()


#  计算代价函数
def computeCost(X, y, theta):
    inner = np.power(((X * theta.T) - y), 2)  # power(x, y) 函数，计算 x 的 y 次方。
    return np.sum(inner) / (2 * len(X))


# insert()函数，插入第0列，列名Ones，数据为1
data.insert(0, 'Ones', 1)
print(data.head())


# 变量初始化 set X (training data) and y (target variable)
cols = data.shape[1]  # 列数，读取data的形状，3
X = data.iloc[:, 0:cols - 1]  # 取前cols-1列，即输入向量,人口数量0:2
y = data.iloc[:, cols - 1: cols]  # 取最后一列，即目标向量，利润2:3
# print(cols)

X.head()  # 查看前五行0：4
y.head()


X = np.matrix(X.values)   # print(X.shape)    (97, 2)
y = np.matrix(y.values)
theta = np.matrix([0, 0])   # print(theta)   [[0  0]]


np.array([[0, 0]]).shape
# (1,2)


X.shape, theta.shape, y.shape
# ((97,2), (1,2), (97,1))


# 计算初始代价函数的值
computeCost(X, y, theta)


X.shape, theta.shape, y.shape, X.shape[0]


def gradientDescent(X, y, theta, alpha, epoch):  # 学习率 alpha = 0.01, 循环次数 epoch =1000
    """return theta, cost"""

    temp = np.matrix(np.zeros(theta.shape))  # 初始化一个θ的临时矩阵（1，2）
    parameters = int(theta.flatten().shape[1])  # 参数θ的数量  flatten()函数是变平，以行优先，shape是获得它的列数
    cost = np.zeros(epoch)  # 初始化一个ndarray，包含每次epoch的cost（代价）
    m = X.shape[0]  # 样本数量m     shape()函数返回 [行数，列数]

    for i in range(epoch):
        #  利用向量化一步求解
        temp = theta - (alpha / m) * (X * theta.T - y).T * X

        # 以下是不用Vectorization求解梯度下降
#         error = (X * theta.T) - y  # (97, 1)

#         for j in range(parameters):
#             term = np.multiply(error, X[:,j])  # (97, 1)
#             temp[0,j] = theta[0,j] - ((alpha / m) * np.sum(term))  # (1,1)

        theta = temp  # 优化theta
        cost[i] = computeCost(X, y, theta)

    return theta, cost


# 初始化一些附加变量--学习速率α和要执行的迭代次数
alpha = 0.01
epoch = 1000

# 运行梯度下降算法训练合适的θ
final_theta, cost = gradientDescent(X, y, theta, alpha, epoch)


# 使用我们拟合的参数计算训练模型的代价函数（误差）
computeCost(X, y, final_theta)

# 绘制线性模型以及数据，直观地看出它的拟合
x = np.linspace(data.Population.min(), data.Population.max(), 100)  # 横坐标
# np.linspace() 在指定的间隔内返回均匀间隔的数字
f = final_theta[0, 0] + (final_theta[0, 1] * x)  # 纵坐标， 利润


fig, ax = plt.subplots(figsize=(6, 4))
ax.plot(x, f, 'r', label='Prediction')
ax.scatter(data['Population'], data.Profit, label='Traning Data')
ax.legend(loc=2)  # 2表示在左上角
ax.set_xlabel('Population')
ax.set_ylabel('Profit')
ax.set_title('Predicted Profit vs. Population Size')
plt.show()


fig, ax = plt.subplots(figsize=(8, 4))
ax.plot(np.arange(epoch), cost, 'r')  # np.arange() 返回等差数组
ax.set_xlabel('Iterations')
ax.set_ylabel('Cost')
ax.set_title('Error vs. Training Epoch')
plt.show()

DaYinYi

关注

1
点赞
踩
3

收藏

觉得还不错? 一键收藏
1
评论
吴恩达机器学习作业Python实现(一)：线性回归

参考：https://blog.csdn.net/Cowry5/article/details/80174130自己补充了一些：import numpy as npimport pandas as pdimport matplotlib.pyplot as plt# 读取数据path = 'I:\\吴恩达机器学习\\test1\\数据集\\ex1data1.txt'# names 添加列名，header 用指定的行为作为标题，若原无标题且指定标题则设为Nonedata = pd.re
复制链接

扫一扫

专栏目录