MiniFlow -- 12.SGD Solution

这里使用波士顿房价完成一个完整的机器学习
这里添加一个梯度下降函数

def sgd_update(trainables, learning_rate=1e-2):
    """
        Updates the value of each trainable with SGD.

        Arguments:

            `trainables`: A list of `Input` Nodes representing weights/biases.
            `learning_rate`: The learning rate.
        """
    # TODO: update all the `trainables` with SGD
    # You can access and assign the value of a trainable with `value` attribute.
    # Example:
    # for t in trainables:
    #   t.value = your implementation here
    for t in trainables:
        # Change the trainable's value by subtracting the learning rate
        # multiplied by the partial of the cost with respect to this
        # trainable.
        partial = t.gradients[t]
        t.value -= learning_rate * partial

完整代码
nn.py

import numpy as np
from sklearn.datasets import load_boston
from sklearn.utils import shuffle, resample
from miniflow import *

if __name__ == "__main__":
    data = load_boston()
    X_ = data["data"]
    y_ = data["target"]
    # Normalize data
    X_ = (X_ - np.mean(X_, axis=0)) / np.std(X_, axis=0)

    n_features = X_.shape[1]
    n_hidden = 10
    W1_ = np.random.randn(n_features, n_hidden)
    b1_ = np.zeros(n_hidden)
    W2_ = np.random.randn(n_hidden, 1)
    b2_ = np.zeros(1)

    # Neural network
    X, y = Input(), Input()
    W1, b1 = Input(), Input()
    W2, b2 = Input(), Input()

    l1 = Linear(X, W1, b1)
    s1 = Sigmoid(l1)
    l2 = Linear(s1, W2, b2)
    cost = MSE(y, l2)

    feed_dict = {
        X: X_,
        y: y_,
        W1: W1_,
        b1: b1_,
        W2: W2_,
        b2: b2_
    }
    epochs = 10
    # Total number of examples
    m = X_.shape[0]
    batch_size = 11
    steps_per_epoch = m // batch_size

    graph = topological_sort(feed_dict)
    trainables = [W1, b1, W2, b2]

    print("Total number of examples = {}".format(m))
    # Step 4
    for i in range(epochs):
        loss = 0
        for j in range(steps_per_epoch):
            # Step 1
            # Randomly sample a batch of examples
            X_batch, y_batch = resample(X_, y_, n_samples=batch_size)

            # Reset value of X and y Inputs
            X.value = X_batch
            y.value = y_batch

            # Step 2
            forward_and_backward(graph)

            # Step 3
            sgd_update(trainables)

            loss += graph[-1].value

        print("Epoch: {}, Loss: {:.3f}".format(i + 1, loss / steps_per_epoch))

miniflow.py

import numpy as np


class Layer(object):
    def __init__(self, inbound_layers=[]):
        # 输入节点列表 用户前向计算
        self.inbound_layers = inbound_layers
        # 输出节点列表 用户反向传播
        self.outbound_layers = []
        # A calculated value 本节点的计算值
        self.value = 0
        # 这是一个字典
        # key: 该层的输入层
        # value: 改成关于这个输入层的偏微分
        self.gradients = {}
        # Add this node as an outbound node on its inputs.
        # 将本节点作为输入节点的输出节点
        for layers in self.inbound_layers:
            layers.outbound_layers.append(self)

    def forward(self):
        """
        Forward propagation 虚函数
        前向运算 基于 inbound_nodes 输入节点列表的 value 计算输入节点列表的输出值
        存储在 self.value
        :return:
        """
        raise NotImplemented

    def backward(self):
        return NotImplementedError


class Input(Layer):
    def __init__(self):
        """
        Input node 没有 inbound nodes ,他是整个神经网络的开始
        因此不需要进行任何操作
        """
        # 输入节点列表为空
        Layer.__init__(self)

    def forward(self):
        pass

    def backward(self):
        # An Input node has no inputs so the gradient (derivative)
        # is zero.
        # The key, `self`, is reference to this object.
        self.gradients = {self: 0}
        # Weights and bias may be inputs, so you need to sum the gradient from output gradients.
        for layer in self.outbound_layers:
            grad_cost = layer.gradients[self]
            self.gradients[self] += grad_cost * 1


class Linear(Layer):
    def __init__(self, X, W, b):
        # Notice the ordering of the inputs passed to the
        # Node constructor.
        Layer.__init__(self, [X, W, b])

    def forward(self):
        # 2*3
        X = self.inbound_layers[0].value
        # 3*1
        W = self.inbound_layers[1].value
        # 1
        b = self.inbound_layers[2].value
        # 2*1
        self.value = np.dot(X, W) + b

    def backward(self):
        """
        Calculates the gradient based on the output values.
        :return:
        """
        """
                Calculates the gradient based on the output values.
                """
        # Initialize a partial for each of the inbound_nodes.
        self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_layers}
        # Cycle through the outputs. The gradient will change depending
        # on each output, so the gradients are summed over all outputs.
        for n in self.outbound_layers:
            # Get the partial of the cost with respect to this node.
            # 2*1 2是批次
            grad_cost = n.gradients[self]
            # Set the partial of the loss with respect to this node's inputs.
            # 将每一个批次对所有w进行广播一次,得到一个批次输入的导数
            self.gradients[self.inbound_layers[0]] += np.dot(grad_cost, self.inbound_layers[1].value.T)
            # Set the partial of the loss with respect to this node's weights.
            # x为2*3 3表示有3个输入  上层倒数grad_cost是2*1 2为批次, W为3*1。对输入转,相应批次的输入x与对应的批次倒数相乘, 之后相加得到一个综合的w
            self.gradients[self.inbound_layers[1]] += np.dot(self.inbound_layers[0].value.T, grad_cost)
            # Set the partial of the loss with respect to this node's bias.
            self.gradients[self.inbound_layers[2]] += np.sum(grad_cost, axis=0, keepdims=False)


class Sigmoid(Layer):
    def __init__(self, layer):
        Layer.__init__(self, [layer])

    def _sigmoid(self, x):
        return 1. / (1 + np.exp(-x))

    def forward(self):
        # 2*1
        input_value = self.inbound_layers[0].value
        # 2*1
        self.value = self._sigmoid(input_value)

    def backward(self):
        """
        Calculates the gradient using the derivative of
        the sigmoid function.
        """
        # Initialize the gradients to 0.
        self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_layers}

        # Cycle through the outputs. The gradient will change depending
        # on each output, so the gradients are summed over all outputs.
        for n in self.outbound_layers:
            # Get the partial of the cost with respect to this node.
            # 获得上层计算本层的导数 2*1
            grad_cost = n.gradients[self]
            sigmoid = self.value
            # 2*1
            self.gradients[self.inbound_layers[0]] += sigmoid * (1 - sigmoid) * grad_cost


class MSE(Layer):
    def __init__(self, y, a):
        Layer.__init__(self, [y, a])

    def forward(self):
        # 2*1
        y = self.inbound_layers[0].value.reshape(-1, 1)
        # 2*1
        a = self.inbound_layers[1].value.reshape(-1, 1)
        self.m = self.inbound_layers[0].value.shape[0]
        # Save the computed output for backward. y 和 a 具有相同的行列
        # 2*1
        self.diff = y - a
        # 1
        self.value = np.mean(self.diff ** 2)

    def backward(self):
        """
        Calculates the gradient of the cost.
        """
        # 二次函数求导,本层计算上一层的的偏导数
        # 2*1
        self.gradients[self.inbound_layers[0]] = (2 / self.m) * self.diff
        # 2*1
        self.gradients[self.inbound_layers[1]] = (-2 / self.m) * self.diff


"""
Can you augment the Add class so that it accepts
any number of nodes as input?

Hint: this may be useful:
https://docs.python.org/3/tutorial/controlflow.html#unpacking-argument-lists
"""


def topological_sort(feed_dict):
    """
    我们将所有节点构成一个graphs,一个计算图
    :param feed_dict: 这个一个输入节点的字典,key 是Input node value 是初始化的值
    :return: 返回一个序列化的节点列表
    """
    # 提取所有输入节点
    input_layers = [n for n in feed_dict.keys()]
    G = {}
    # 所有输入节点列表
    layers = [n for n in input_layers]
    # 这里将所有输入节点和输出节点作为G的key node, value也是一个字典,in记录key node的所有输入节点列表,out记录key node 的所有输出节点列表
    # 其中一个G的一个元素如下,node 可以是Input 也可以是Add, 如果node是Input 那么in为空,如果node是末尾节点,out为空
    # 如果node是中间节点,name in是输入节点列表, out就是输出节点
    #       |-in list         |-in list
    # node1 |           node2 |
    #       |-out list        |-out list

    while len(layers) > 0:
        # 从头部弹出一个节点 node
        n = layers.pop(0)
        if n not in G:
            G[n] = {"in": set(), "out": set()}
        for m in n.outbound_layers:
            if m not in G:
                G[m] = {"in": set(), "out": set()}
            G[n]["out"].add(m)
            G[m]["in"].add(n)
            layers.append(m)

    L = []
    # 无序不重复
    S = set(input_layers)
    # 这里序列化L,L为 input input out
    while len(S) > 0:
        n = S.pop()
        # 给输入节点赋值,如果n节点是输入节点,就将赋值
        if isinstance(n, Input):
            n.value = feed_dict[n]
        # 将节点n加到L中
        L.append(n)
        # 检查当前的节点的输出节点,n 可能是 Input Add,或者隐藏层 最后一层的节点没有outbound_nodes
        for m in n.outbound_layers:
            # 删除当前节点中的输出
            G[n]["out"].remove(m)
            # 删除输出节点中的输入
            G[m]['in'].remove(n)
            # 如果 G[m]["in"] 为空的时候说明输入都添加完了,将输出添加到最后
            # 比如一个节点有多个输入,当将这几个输入都添加到L后,在添加其输出节点,以此类推
            if len(G[m]["in"]) == 0:
                S.add(m)
    return L


def forward_and_backward(graph):
    """
    Performs a forward pass and a backward pass through a list of sorted Nodes.

    Arguments:

        `graph`: The result of calling `topological_sort`.
    """
    # Forward pass
    for n in graph:
        n.forward()

    # Backward pass
    # see: https://docs.python.org/2.3/whatsnew/section-slices.html
    for n in graph[::-1]:
        n.backward()

def sgd_update(trainables, learning_rate=1e-2):
    """
        Updates the value of each trainable with SGD.

        Arguments:

            `trainables`: A list of `Input` Nodes representing weights/biases.
            `learning_rate`: The learning rate.
        """
    # TODO: update all the `trainables` with SGD
    # You can access and assign the value of a trainable with `value` attribute.
    # Example:
    # for t in trainables:
    #   t.value = your implementation here
    for t in trainables:
        # Change the trainable's value by subtracting the learning rate
        # multiplied by the partial of the cost with respect to this
        # trainable.
        partial = t.gradients[t]
        t.value -= learning_rate * partial
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值