吴恩达机器学习第四次作业：反向传播算法

最新推荐文章于 2024-07-25 21:10:30 发布

枸空

最新推荐文章于 2024-07-25 21:10:30 发布

阅读量649

点赞数

文章标签：机器学习算法 python

本文链接：https://blog.csdn.net/weixin_42349817/article/details/121133423

版权

本代码全部采用向量化方式进行code，如下：

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import loadmat
from sklearn.metrics import classification_report
from scipy.optimize import minimize
from sklearn.preprocessing import OneHotEncoder


def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def sigmoid_gradient(z):
    return np.multiply(sigmoid(z), (1 - sigmoid(z)))


def forward_propagate(X, theta1, theta2):
    X = np.mat(X)
    theta1 = np.mat(theta1)
    theta2 = np.mat(theta2)
    a1 = X
    z2 = theta1 * X.T
    a2 = np.insert(sigmoid(z2), 0, 1, axis=0)
    z3 = theta2 * a2
    h = sigmoid(z3)
    # h这里是列向量
    # h = np.argmax(a3, axis=0).T + 1
    return z2, a2, z3, h


#  向量化计算代价函数
def cost(theta1, theta2, hidden_size, num_labels, X, y, learnRate):
    X = np.mat(X)
    y = np.mat(y)
    theta1 = np.mat(theta1)
    theta2 = np.mat(theta2)
    z2, a2, z3, h = forward_propagate(X, theta1, theta2)

    # 下面两个方案是如果将data['y']=[10,10...9,9...1]直接传入进来而没进行分类编码而需要的的操作
    # 方案1  先初设一个零矩阵，其shape为10*5000，利用一个循环将相应的位置的值设为1
    # 即如果y[i] = 2, 则第2列的向量设为[0,1,0,0....0]
    # temp = np.mat(np.zeros((theta2.shape[0], X.shape[0])))
    # for i in range(X.shape[0]):
    #     temp[y[i, 0] - 1, i] = 1

    # 方案2  利用sklearn.preprocessing的OneHotEncoder进行编码
    # encoder = OneHotEncoder(sparse=False)
    # 这里y的类型应该设为np.array否则会有warning
    # y = np.array(y)
    # temp = encoder.fit_transform(y).T
    # print(temp.shape)

    # 这里完全用向量来计算代价，相比for循环开销小一些
    # temp是向量化的y的值，其形状是10*5000，h是对X的预测矩阵，其形状是10*5000
    # 对于h和temp的每一列做对应位的乘法得到10*5000的矩阵，最后对所有元素加和即为所需要的first和second
    first = np.sum(np.multiply(-y, np.log(h)))
    # 防止产生log（0）这种情况，给自变量加1e-5用来近似代表0
    second = np.sum(np.multiply(1 - y, np.log(1 - h + 1e-5)))
    # 正则化的时候别忘了对于所有的theta0不需要正则，虽然正则了也没太大区别，但是一般还是不正则
    reg = (np.sum(np.power(theta1[:, 1:], 2)) + np.sum(np.power(theta2[:, 1:], 2))) * learnRate / (2 * X.shape[0])
    return (first - second) / X.shape[0] + reg


#  反向传播算法
def backprop(params, hidden_size, num_labels, X, y, learnRate):
    X = np.mat(X)
    y = np.mat(y)
    # params = np.mat(params).reshape(1, hidden_size * (input_size + 1) + num_labels * (hidden_size + 1))

    # 生成随机初始化的theta1和theta2
    # theta1 = np.mat(params[:, :hidden_size * (input_size + 1)].reshape(hidden_size, input_size + 1))
    # theta2 = np.mat(params[:, hidden_size * (input_size + 1):].reshape(num_labels, hidden_size + 1))
    theta1 = np.mat(params[:hidden_size * (input_size + 1)].reshape(hidden_size, input_size + 1))
    theta2 = np.mat(params[hidden_size * (input_size + 1):].reshape(num_labels, hidden_size + 1))
    # print(theta1.shape, theta2.shape)
    z2, a2, z3, h = forward_propagate(X, theta1, theta2)
    z2 = np.mat(np.insert(z2, 0, 1, axis=0))
    a2 = np.mat(a2)
    z3 = np.mat(z3)
    h = np.mat(h)

    # 计算代价
    first = np.sum(np.multiply(-y, np.log(h)))
    second = np.sum(np.multiply(1 - y, np.log(1 - h + 1e-5)))
    reg = (np.sum(np.power(theta1[:, 1:], 2)) + np.sum(np.power(theta2[:, 1:], 2))) * learnRate / (2 * X.shape[0])
    J = (first - second) / X.shape[0] + reg

    # 反向传播
    dt3 = h - y  # 10*5000
    dt2 = np.multiply(theta2.T * dt3, sigmoid_gradient(z2))  # 26*5000
    # print(dt3.shape, a2.shape)
    # 先获得原始的delta，然后再对需要正则化的部分进行额外处理
    delta1 = np.dot(dt2[1:, :], X) / X.shape[0]
    delta1[:, 1:] += (theta1[:, 1:] * learnRate / X.shape[0])
    delta2 = np.dot(dt3, a2.T) / X.shape[0]
    delta2[:, 1:] += (theta2[:, 1:] * learnRate / X.shape[0])
    # print(delta1.shape, delta2.shape)
    grad = np.concatenate((np.ravel(delta1), np.ravel(delta2)))
    # print(grad.shape)
    # print(params.shape)
    return J, grad


if __name__ == '__main__':
    data = loadmat('ex4data1.mat')
    # print(data)
    # X是5000*401的矩阵，y是5000*1的矩阵
    y_true = data['y']
    encoder = OneHotEncoder(sparse=False)
    X = np.mat(np.insert(data['X'], 0, 1, axis=1))
    y = np.mat(encoder.fit_transform(data['y']).T)
    # print(y)

    # print(X.shape, y.shape)
    weight = loadmat('ex4weights.mat')
    # theta1是25*401的矩阵，theta2是10*26的矩阵
    theta1 = np.mat(weight['Theta1'])
    theta2 = np.mat(weight['Theta2'])
    # print(theta1.shape, theta2.shape)
    # print(theta1, theta2)

    # 展示一下随机取得的100张样本图片
    # sample_index = np.random.choice(np.arange(X.shape[0]), 100, replace=False)
    # sample = data['X'][sample_index, :]
    # print(sample.shape)

    # 展示一百张样本图
    # fig, ax = plt.subplots(nrows=10, ncols=10, sharex=True, sharey=True, figsize=(12, 12))
    # for i in range(10):
    #     for j in range(10):
    #         ax[i, j].matshow(sample[10 * i + j].reshape(20, 20).T, cmap=plt.cm.binary)
    #         plt.xticks(np.array([]))
    #         plt.yticks(np.array([]))
    # plt.show()

    # z1 = theta1 * X.T
    # a1 = np.insert(sigmoid(z1), 0, 1, axis=0)
    # z2 = theta2 * a1
    # a2 = sigmoid(z2)
    # print(a2.shape)
    # predict = np.argmax(a2, axis=0).T + 1
    # print(predict.shape)
    # print(classification_report(y, predict))
    hidden_size = 25
    num_labels = 10
    input_size = 400
    learnRate = 1
    # print(cost(theta1, theta2, hidden_size, num_labels, X, y, learnRate))
    # print(sigmoid_gradient(0))
    # a = np.mat([[1,2,3],
    #             [4,5,6],
    #             [1,2,1]])
    # b = np.mat([[1],
    #             [2],
    #             [3]])
    # print(np.multiply(a, b))
    params = (np.random.random(size=hidden_size * (input_size + 1) + num_labels * (hidden_size + 1)) - 0.5) * 0.24
    # print(params.shape[0])
    fmin = minimize(fun=backprop, x0=params, args=(hidden_size, num_labels, X, y, learnRate),
                    method='TNC', jac=True, options={'maxiter': 250})
    theta1_f = np.mat(fmin.x[: hidden_size * (input_size + 1)]).reshape(hidden_size, input_size + 1)
    theta2_f = np.mat(fmin.x[hidden_size * (input_size + 1):]).reshape(num_labels, hidden_size + 1)
    # print(theta1_f.shape, theta2_f.shape)
    z2, a2, z3, h = forward_propagate(X, theta1_f, theta2_f)
    # print(h.shape)
    y_pred = np.argmax(h, axis=0).T + 1
    print(classification_report(y_true, y_pred))

有一处需要注意，优化函数minimize里的x0需要是一个向量，因此要将神经网络的theta1和theta2两个权重矩阵展开成向量然后再拼接在一起才能使用。

枸空

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
吴恩达机器学习第四次作业：反向传播算法

本代码全部采用向量化方式进行code，如下：import numpy as npimport pandas as pdimport matplotlib.pyplot as pltfrom scipy.io import loadmatfrom sklearn.metrics import classification_reportfrom scipy.optimize import minimizefrom sklearn.preprocessing import OneHotEncoder
复制链接

扫一扫