吴恩达机器学习python实现4 神经网络

Scarlett1007

已于 2022-03-20 15:12:54 修改

阅读量1.3k

点赞数

文章标签： python 机器学习

于 2022-03-20 15:11:25 首次发布

本文链接：https://blog.csdn.net/weixin_59057086/article/details/123613401

版权

1、对数组进行一维和多维变化

# 将多维参数数组映射到一个向量上
def serializer(thetas):
    res = np.array([0])
    for t in thetas:
        res = np.concatenate((res, t.ravel()), axis=0)
    return res[1:]


# 将向量还原为多个参数
def deserialize(theta):
    return theta[:25*401].reshape(25, 401), theta[25*401:].reshape(10, 26)

2、将y向量化

def convert(y):
    n = len(np.unique(y))
    res = False
    for i in y:
        temp = np.zeros((1, n))
        temp[0][i[0] % 10] = 1
        if type(res) == bool:
            res = temp
        else:
            res = np.concatenate((res, temp), axis=0)
    return res

3、前向传播并计算cost

def sigmoid(z):
    return 1/(1+np.exp(-z))


def feedforword(thetas, X):
    A, Z = [], []
    a = X
    for t in deserialize(thetas):
        a = np.insert(a, 0, 1, axis=1)
        A.append(a)
        z = a.dot(t.T)
        Z.append(z)
        a = sigmoid(z)
    A.append(a)
    return A, Z


# 非正则化的损失值
def not_regularized_cost(thetas, X, y):
    for t in deserialize(thetas):
        X = np.insert(X, 0, 1, axis=1)
        X = sigmoid(X.dot(t.T))
    return np.mean(np.sum((-y)*np.log(X)-(1-y)*np.log(1-X), axis=1))


# 正则化的损失值
def regularized_cost(thetas, X, y, lamda):
    m = X.shape[0]
    part2 = 0
    for t in deserialize(thetas):
        X = np.insert(X, 0, 1, axis=1)
        X = sigmoid(X.dot(t.T))
        t = t[:, 1:]  # 去掉bias unit
        part2 += (lamda/(2*m))*np.sum(t*t)
    part1 = np.mean(np.sum((-y)*np.log(X)-(1-y)*np.log(1-X), axis=1))
    return part1 + part2

输出结果

    theta = sio.loadmat(文件路径)
    data = sio.loadmat(文件路径)
    y = convert(data['y'])
    # 训练集中对y的处理是 1 2 3 ... 0
    # convert处理中是 0 1 2 ... 9
    y0 = y[:, 0].reshape(y.shape[0], 1)
    y = np.concatenate((y[:, 1:], y0), axis=1)  # (5000,10)
    X = data["X"]  # (5000,400)
    theta1 = theta["Theta1"]  # shape(25,401)
    theta2 = theta["Theta2"]  # shape(10,26)
    theta = serializer((theta1, theta2))  # 一维数组
    a1 = X
    a1 = np.insert(a1, 0, 1, axis=1)  # (5000,401)
    a2 = sigmoid(a1.dot(theta1.T))  # (5000,25)
    a2 = np.insert(a2, 0, 1, axis=1)  # (5000,26)
    a3 = sigmoid(a2.dot(theta2.T))  # (5000,10)
    a = feedforword(theta, X)
    cost = np.mean(np.sum((-y)*np.log(a3)-(1-y)*np.log(1-a3), axis=1))
    print(cost)  # 0.2876291651613189
    print(not_regularized_cost(theta, X, y))  # 0.2876291651613189
    print(regularized_cost(theta, X, y, 1))  # 0.38376985909092365

4、反向传播

# sigmoid梯度
def sigmoid_gradient(z):
    return sigmoid(z) * (1-sigmoid(z))


# 随机初始化参数, 范围[-e, e]
def random_initialize_weights(shape, e=0.12):
    return (np.random.rand(shape[0], shape[1])-0.5) * 2 * e


# 反向传播算法
def back(thetas, X, y, lamda):
    A, Z = feedforword(thetas, X)
    a1, a2, a3 = A  # a1(5000,401) a2(5000,26) a3(5000,10)
    z2, z3 = Z  # z2(5000,25) z3(5000,10)
    theta1, theta2 = deserialize(theta)  # theta1(25,401) theta2(10,26)
    m = X.shape[0]
    d3 = a3 - y  # (5000,10)
    d2 = d3.dot(theta2)[:, 1:] * sigmoid_gradient(z2)
    theta1 = np.insert(np.delete(theta1, 0, axis=1), 0, 0, axis=1)
    theta2 = np.insert(np.delete(theta2, 0, axis=1), 0, 0, axis=1)
    D1 = (1/m) * d2.T.dot(a1) + (1/m) * theta1  # (25, 401)
    D2 = (1/m) * d3.T.dot(a2) + (1/m) * theta2  # (10, 26)
    return serializer((D1, D2))

代入数据

    print(sigmoid_gradient(0))  # 0.25
    print(random_initialize_weights((2, 2)))
    y = convert(data['y'])
    theta1 = random_initialize_weights((25, 401))
    theta2 = random_initialize_weights((10, 26))
    theta = serializer((theta1, theta2))
    res = opt.minimize(fun=regularized_cost, x0=theta, args=(X, y, 1), method="TNC", jac=back)
    print(res)
    print(res.x.shape)  # 10285
    theta1, theta2 = deserialize(res.x)
    sio.savemat("parametersWeights.mat", {"theta1": theta1, "theta2": theta2})

得出结果

0.25
[[ 0.02190042 -0.01471704]
 [ 0.09290001  0.02323642]]
     fun: 0.635188914309726
     jac: array([-5.57739695e-05, -1.65921829e-05,  1.77331961e-05, ...,
        2.06364816e-04, -7.09894744e-04, -6.71933834e-04])
 message: 'Converged (|f_n-f_(n-1)| ~= 0)'
    nfev: 221
     nit: 15
  status: 1
 success: True
       x: array([ 0.14616376,  0.01840686, -0.02081726, ..., -5.62499118,
        2.55922323, -3.19727727])
(10285,)

5、预测

# 利用训练好的参数进行预测
def predict(thetas, X):
    a3 = feedforword(thetas, X)[0][-1]
    p = np.zeros((1, 10))
    for i in a3:
        index = np.argmax(i)
        temp = np.zeros((1, 10))
        temp[0][index] = 1
        p = np.concatenate((p, temp), axis=0)
    return p[1:]

print(classification_report(y, predict(res.x, X)))

输出结果

 precision    recall  f1-score   support

           0       0.96      0.98      0.97       500
           1       0.96      0.96      0.96       500
           2       0.93      0.92      0.93       500
           3       0.93      0.90      0.92       500
           4       0.95      0.96      0.96       500
           5       0.93      0.92      0.93       500
           6       0.96      0.97      0.97       500
           7       0.94      0.92      0.93       500
           8       0.92      0.94      0.93       500
           9       0.94      0.93      0.93       500

   micro avg       0.94      0.94      0.94      5000
   macro avg       0.94      0.94      0.94      5000
weighted avg       0.94      0.94      0.94      5000
 samples avg       0.94      0.94      0.94      5000

6、可视化

# 将若干张图片组成一张图片
def mapping(images, images_dimension):
    image_dimension = int(np.sqrt(images.shape[-1]))
    image = False
    im = False
    for i in images:
        if type(image) == bool:
            image = i.reshape(image_dimension, image_dimension)
        else:
            if image.shape[-1] == image_dimension * images_dimension:
                if type(im) == bool:
                    im = image
                else:
                    im = np.concatenate((im, image), axis=0)
                image = i.reshape(image_dimension, image_dimension)
            else:
                image = np.concatenate((image, i.reshape(image_dimension, image_dimension)), axis=1)
    return np.concatenate((im, image), axis=0) # 200*200数组


# 可视化隐藏层的输入输出
def visualizing_the_hidden_layer(theta, X):
    A, _ = feedforword(theta, X)
    a1, a2, a3 = A
    input = a1[..., 1:][:25]
    output = a2[..., 1:][:25]
    input = mapping(input, 5)
    output = mapping(output, 5)
    plt.subplot(1, 2, 1)
    plt.axis("off")
    plt.imshow(input.T)
    plt.title("hidden layer input")
    plt.subplot(1, 2, 2)
    plt.axis("off")
    plt.imshow(output)
    plt.title("hidden layer output")
    plt.show()