MiniFlow -- 12.SGD Solution

最新推荐文章于 2021-06-28 22:19:41 发布

xf8964

最新推荐文章于 2021-06-28 22:19:41 发布

阅读量148

点赞数

分类专栏： MiniFlow

本文链接：https://blog.csdn.net/xf8964/article/details/118074566

版权

MiniFlow 专栏收录该内容

12 篇文章 0 订阅

订阅专栏

这里使用波士顿房价完成一个完整的机器学习
这里添加一个梯度下降函数

def sgd_update(trainables, learning_rate=1e-2):
    """
        Updates the value of each trainable with SGD.

        Arguments:

            `trainables`: A list of `Input` Nodes representing weights/biases.
            `learning_rate`: The learning rate.
        """
    # TODO: update all the `trainables` with SGD
    # You can access and assign the value of a trainable with `value` attribute.
    # Example:
    # for t in trainables:
    #   t.value = your implementation here
    for t in trainables:
        # Change the trainable's value by subtracting the learning rate
        # multiplied by the partial of the cost with respect to this
        # trainable.
        partial = t.gradients[t]
        t.value -= learning_rate * partial

完整代码
nn.py

import numpy as np
from sklearn.datasets import load_boston
from sklearn.utils import shuffle, resample
from miniflow import *

if __name__ == "__main__":
    data = load_boston()
    X_ = data["data"]
    y_ = data["target"]
    # Normalize data
    X_ = (X_ - np.mean(X_, axis=0)) / np.std(X_, axis=0)

    n_features = X_.shape[1]
    n_hidden = 10
    W1_ = np.random.randn(n_features, n_hidden)
    b1_ = np.zeros(n_hidden)
    W2_ = np.random.randn(n_hidden, 1)
    b2_ = np.zeros(1)

    # Neural network
    X, y = Input(), Input()
    W1, b1 = Input(), Input()
    W2, b2 = Input(), Input()

    l1 = Linear(X, W1, b1)
    s1 = Sigmoid(l1)
    l2 = Linear(s1, W2, b2)
    cost = MSE(y, l2)

    feed_dict = {
        X: X_,
        y: y_,
        W1: W1_,
        b1: b1_,
        W2: W2_,
        b2: b2_
    }
    epochs = 10
    # Total number of examples
    m = X_.shape[0]
    batch_size = 11
    steps_per_epoch = m // batch_size

    graph = topological_sort(feed_dict)
    trainables = [W1, b1, W2, b2]

    print("Total number of examples = {}".format(m))
    # Step 4
    for i in range(epochs):
        loss = 0
        for j in range(steps_per_epoch):
            # Step 1
            # Randomly sample a batch of examples
            X_batch, y_batch = resample(X_, y_, n_samples=batch_size)

            # Reset value of X and y Inputs
            X.value = X_batch
            y.value = y_batch

            # Step 2
            forward_and_backward(graph)

            # Step 3
            sgd_update(trainables)

            loss += graph[-1].value

        print("Epoch: {}, Loss: {:.3f}".format(i + 1, loss / steps_per_epoch))

miniflow.py

import numpy as np


class Layer(object):
    def __init__(self, inbound_layers=[]):
        # 输入节点列表 用户前向计算
        self.inbound_layers = inbound_layers
        # 输出节点列表 用户反向传播
        self.outbound_layers = []
        # A calculated value 本节点的计算值
        self.value = 0
        # 这是一个字典
        # key: 该层的输入层
        # value: 改成关于这个输入层的偏微分
        self.gradients = {}
        # Add this node as an outbound node on its inputs.
        # 将本节点作为输入节点的输出节点
        for layers in self.inbound_layers:
            layers.outbound_layers.append(self)

    def forward(self):
        """
        Forward propagation 虚函数
        前向运算 基于 inbound_nodes 输入节点列表的 value 计算输入节点列表的输出值
        存储在 self.value
        :return:
        """
        raise NotImplemented

    def backward(self):
        return NotImplementedError


class Input(Layer):
    def __init__(self):
        """
        Input node 没有 inbound nodes ，他是整个神经网络的开始
        因此不需要进行任何操作
        """
        # 输入节点列表为空
        Layer.__init__(self)

    def forward(self):
        pass

    def backward(self):
        # An Input node has no inputs so the gradient (derivative)
        # is zero.
        # The key, `self`, is reference to this object.
        self.gradients = {self: 0}
        # Weights and bias may be inputs, so you need to sum the gradient from output gradients.
        for layer in self.outbound_layers:
            grad_cost = layer.gradients[self]
            self.gradients[self] += grad_cost * 1


class Linear(Layer):
    def __init__(self, X, W, b):
        # Notice the ordering of the inputs passed to the
        # Node constructor.
        Layer.__init__(self, [X, W, b])

    def forward(self):
        # 2*3
        X = self.inbound_layers[0].value
        # 3*1
        W = self.inbound_layers[1].value
        # 1
        b = self.inbound_layers[2].value
        # 2*1
        self.value = np.dot(X, W) + b

    def backward(self):
        """
        Calculates the gradient based on the output values.
        :return:
        """
        """
                Calculates the gradient based on the output values.
                """
        # Initialize a partial for each of the inbound_nodes.
        self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_layers}
        # Cycle through the outputs. The gradient will change depending
        # on each output, so the gradients are summed over all outputs.
        for n in self.outbound_layers:
            # Get the partial of the cost with respect to this node.
            # 2*1 2是批次
            grad_cost = n.gradients[self]
            # Set the partial of the loss with respect to this node's inputs.
            # 将每一个批次对所有w进行广播一次，得到一个批次输入的导数
            self.gradients[self.inbound_layers[0]] += np.dot(grad_cost, self.inbound_layers[1].value.T)
            # Set the partial of the loss with respect to this node's weights.
            # x为2*3 3表示有3个输入  上层倒数grad_cost是2*1 2为批次， W为3*1。对输入转，相应批次的输入x与对应的批次倒数相乘， 之后相加得到一个综合的w
            self.gradients[self.inbound_layers[1]] += np.dot(self.inbound_layers[0].value.T, grad_cost)
            # Set the partial of the loss with respect to this node's bias.
            self.gradients[self.inbound_layers[2]] += np.sum(grad_cost, axis=0, keepdims=False)


class Sigmoid(Layer):
    def __init__(self, layer):
        Layer.__init__(self, [layer])

    def _sigmoid(self, x):
        return 1. / (1 + np.exp(-x))

    def forward(self):
        # 2*1
        input_value = self.inbound_layers[0].value
        # 2*1
        self.value = self._sigmoid(input_value)

    def backward(self):
        """
        Calculates the gradient using the derivative of
        the sigmoid function.
        """
        # Initialize the gradients to 0.
        self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_layers}

        # Cycle through the outputs. The gradient will change depending
        # on each output, so the gradients are summed over all outputs.
        for n in self.outbound_layers:
            # Get the partial of the cost with respect to this node.
            # 获得上层计算本层的导数 2*1
            grad_cost = n.gradients[self]
            sigmoid = self.value
            # 2*1
            self.gradients[self.inbound_layers[0]] += sigmoid * (1 - sigmoid) * grad_cost


class MSE(Layer):
    def __init__(self, y, a):
        Layer.__init__(self, [y, a])

    def forward(self):
        # 2*1
        y = self.inbound_layers[0].value.reshape(-1, 1)
        # 2*1
        a = self.inbound_layers[1].value.reshape(-1, 1)
        self.m = self.inbound_layers[0].value.shape[0]
        # Save the computed output for backward. y 和 a 具有相同的行列
        # 2*1
        self.diff = y - a
        # 1
        self.value = np.mean(self.diff ** 2)

    def backward(self):
        """
        Calculates the gradient of the cost.
        """
        # 二次函数求导，本层计算上一层的的偏导数
        # 2*1
        self.gradients[self.inbound_layers[0]] = (2 / self.m) * self.diff
        # 2*1
        self.gradients[self.inbound_layers[1]] = (-2 / self.m) * self.diff


"""
Can you augment the Add class so that it accepts
any number of nodes as input?

Hint: this may be useful:
https://docs.python.org/3/tutorial/controlflow.html#unpacking-argument-lists
"""


def topological_sort(feed_dict):
    """
    我们将所有节点构成一个graphs，一个计算图
    :param feed_dict: 这个一个输入节点的字典，key 是Input node value 是初始化的值
    :return: 返回一个序列化的节点列表
    """
    # 提取所有输入节点
    input_layers = [n for n in feed_dict.keys()]
    G = {}
    # 所有输入节点列表
    layers = [n for n in input_layers]
    # 这里将所有输入节点和输出节点作为G的key node， value也是一个字典，in记录key node的所有输入节点列表，out记录key node 的所有输出节点列表
    # 其中一个G的一个元素如下，node 可以是Input 也可以是Add， 如果node是Input 那么in为空，如果node是末尾节点，out为空
    # 如果node是中间节点，name in是输入节点列表， out就是输出节点
    #       |-in list         |-in list
    # node1 |           node2 |
    #       |-out list        |-out list

    while len(layers) > 0:
        # 从头部弹出一个节点 node
        n = layers.pop(0)
        if n not in G:
            G[n] = {"in": set(), "out": set()}
        for m in n.outbound_layers:
            if m not in G:
                G[m] = {"in": set(), "out": set()}
            G[n]["out"].add(m)
            G[m]["in"].add(n)
            layers.append(m)

    L = []
    # 无序不重复
    S = set(input_layers)
    # 这里序列化L，L为 input input out
    while len(S) > 0:
        n = S.pop()
        # 给输入节点赋值，如果n节点是输入节点，就将赋值
        if isinstance(n, Input):
            n.value = feed_dict[n]
        # 将节点n加到L中
        L.append(n)
        # 检查当前的节点的输出节点，n 可能是 Input Add，或者隐藏层 最后一层的节点没有outbound_nodes
        for m in n.outbound_layers:
            # 删除当前节点中的输出
            G[n]["out"].remove(m)
            # 删除输出节点中的输入
            G[m]['in'].remove(n)
            # 如果 G[m]["in"] 为空的时候说明输入都添加完了，将输出添加到最后
            # 比如一个节点有多个输入，当将这几个输入都添加到L后，在添加其输出节点，以此类推
            if len(G[m]["in"]) == 0:
                S.add(m)
    return L


def forward_and_backward(graph):
    """
    Performs a forward pass and a backward pass through a list of sorted Nodes.

    Arguments:

        `graph`: The result of calling `topological_sort`.
    """
    # Forward pass
    for n in graph:
        n.forward()

    # Backward pass
    # see: https://docs.python.org/2.3/whatsnew/section-slices.html
    for n in graph[::-1]:
        n.backward()

def sgd_update(trainables, learning_rate=1e-2):
    """
        Updates the value of each trainable with SGD.

        Arguments:

            `trainables`: A list of `Input` Nodes representing weights/biases.
            `learning_rate`: The learning rate.
        """
    # TODO: update all the `trainables` with SGD
    # You can access and assign the value of a trainable with `value` attribute.
    # Example:
    # for t in trainables:
    #   t.value = your implementation here
    for t in trainables:
        # Change the trainable's value by subtracting the learning rate
        # multiplied by the partial of the cost with respect to this
        # trainable.
        partial = t.gradients[t]
        t.value -= learning_rate * partial

xf8964

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
MiniFlow -- 12.SGD Solution

这里使用波士顿房价完成一个完整的机器学习这里添加一个梯度下降函数def sgd_update(trainables, learning_rate=1e-2): """ Updates the value of each trainable with SGD. Arguments: `trainables`: A list of `Input` Nodes representing weights/biases.
复制链接

扫一扫