Numpy实现最简单的梯度下降网络权重更新（导数定义法）（《Pytorch计算机视觉实战》第一章源码注释，仅作个人学习记录用）

工大CV吴彦祖

已于 2024-01-24 15:07:24 修改

阅读量485

点赞数 8

分类专栏： PytorchCV 文章标签： numpy pytorch 计算机视觉

于 2024-01-24 14:41:38 首次发布

本文链接：https://blog.csdn.net/Zssss12/article/details/135821671

版权

PytorchCV 专栏收录该内容

13 篇文章 1 订阅

订阅专栏

x = np.array([[1,1]])#网络输入
y = np.array([[0]])#真值输出

import numpy as np 
from copy import deepcopy
import matplotlib.pyplot as plt
x = np.array([[1,1]])#网络输入
y = np.array([[0]])#真值输出

def feed_forward(inputs, outputs, weights):
    pre_hidden = np.dot(inputs,weights[0])+ weights[1]
    hidden = 1/(1+np.exp(-pre_hidden))#向量的每个位置分别进行计算
    out = np.dot(hidden, weights[2]) + weights[3]
    #print(out-outputs)
    #print(np.square(out - outputs))
    #print(np.mean(np.square(out - outputs)))
    mean_squared_error = np.mean(np.square(out - outputs))
    return mean_squared_error

def update_weights(inputs, outputs, weights, lr):
    original_weights = deepcopy(weights)#只用来记数
    temp_weights = deepcopy(weights)#用于算损失函数变化和梯度
    updated_weights = deepcopy(weights)#用于更新参数
    original_loss = feed_forward(inputs, outputs, original_weights)
    for i, layer in enumerate(original_weights):#循环完成网络中所有的参数都被整体更新一次
        #print(i)
        #print(layer)
        #print(enumerate(original_weights))
        for index, weight in np.ndenumerate(layer):#循环完成网络中一层的权重/偏置被整体更新一次
            print(index)#每次循环算一个参数的梯度，更新一个参数
            print(weight)
            temp_weights = deepcopy(weights)
            temp_weights[i][index] += 0.0001
            _loss_plus = feed_forward(inputs, outputs, temp_weights)#该方法是用导数定义算的梯度，不是用导数公式(链式法则)算的梯度
            grad = (_loss_plus - original_loss)/(0.0001)
            updated_weights[i][index] -= grad*lr

    return updated_weights, original_loss

W = [
    np.array([[-0.0053, 0.3793],
              [-0.5820, -0.5204],
              [-0.2723, 0.1896]], dtype=np.float32).T,
    np.array([-0.0140, 0.5607, -0.0628], dtype=np.float32),
    np.array([[ 0.1528, -0.1745, -0.1135]], dtype=np.float32).T,
    np.array([-0.5516], dtype=np.float32)
]
#pre_hidden= np.dot(x,W[0])+W[1]
#print(pre_hidden)
#hidden = 1/(1+np.exp(-pre_hidden))
#print(hidden)
losses = []
for epoch in range(100):#网络参数整体更新100次，该次数为超参数
    W, loss = update_weights(x,y,W,0.01)
    losses.append(loss)
plt.plot(losses)
plt.title('Loss over increasing number of epochs')

pre_hidden = np.dot(x,W[0]) + W[1]
hidden = 1/(1+np.exp(-pre_hidden))
out = np.dot(hidden, W[2]) + W[3]

print(out)