利用python手撸一个简单的多层感知机模型

摸鱼时刻。基本的深度学习原理,很早就接触了,前向传播、反向传播等都有了解,但是一直没自己实现过,虽然说站在巨人的肩膀上可以看的更远,但理解底层会让你站的更稳。

神经网络反向传播算法利用链式求导法则进行,链式求导计算图在程序实现过程中需要使用拓扑排序。

今天用python实现一个简单的y=wx+b模型,也算是对秋季学期《神经网络原理》课程的总结。

import numpy as np
import random

class Node:
    def __init__(self, inputs=[]):
        self.inputs = inputs
        self.outputs = []
        
        for n in self.inputs:
            n.outputs.append(self)
        
        self.value = None
        
        self.gradients = {}
        
    def forward(self):
        """
        前向计算过程
        对输入节点的计算结果保存在self.value
        """
        
        raise NotImplemented
    
    def backward(self):
        
        raise NotImplemented

class Placeholder(Node):
    def __init__(self):
        """
        输入节点没有入站节点,所以无需在node实例化传递任何参数
        """
        Node.__init__(self)
    
    def forward(self, value=None):
        """
        除了输入节点,其他节点的实现都应获得上一个节点的值
        如:val0: self.inbound_nodes[0].value
        """
        
        if value is not None:
            self.value = value
    
    def backward(self):
        self.gradients = {self : 0}
        for n in self.outputs:
            grad_cost = n.gradients[self]
            self.gradients[self] = grad_cost * 1

# class Add(Node):
#     def __init__(self, *nodes):
#         Node.__init__(self, nodes)
    
#     def forward(self):
#         self.value = sum(map(lambda n : n.value, self.inputs))
#         ## 当forward执行时,会按照定义计算值
    
class Linear(Node):
    def __init__(self, nodes, weights, bias):
        Node.__init__(self, [nodes, weights, bias])
    
    def forward(self):
        inputs = self.inputs[0].value
        weights = self.inputs[1].value
        bias = self.inputs[2].value
        
        self.value = np.dot(inputs, weights) + bias
        
    def backward(self):
        # 给每个入站节点初始化一个偏导
        self.gradients = {n : np.zeros_like(n.value) for n in self.inputs}
        
        for n in self.outputs:
            grad_cost = n.gradients[self]
            
            self.gradients[self.inputs[0]] = np.dot(grad_cost, self.inputs[1].value.T)
            self.gradients[self.inputs[1]] = np.dot(self.inputs[0].value.T, grad_cost)
            self.gradients[self.inputs[2]] = np.sum(grad_cost, axis=0, keepdims=False)
        

class Sigmoid(Node):
    def __init__(self, node):
        Node.__init__(self, [node])
    
    def _sigmoid(self, x):
        return 1./(1+np.exp(-1*x))
    
    def forward(self):
        self.x = self.inputs[0].value
        self.value = self._sigmoid(self.x)
    
    def backward(self):
        # y = 1 / (1 + e^-x)
        # y' = 1 / (1 + e^-x) (1 - 1 / (1 + e^-x))
        self.partial = self._sigmoid(self.x) * (1-self._sigmoid(self.x))
        
        self.gradients = {n : np.zeros_like(n.value) for n in self.inputs}
        
        for n in self.outputs:
            grad_cost = n.gradients[self]
            
            self.gradients[self.inputs[0]] = grad_cost * self.partial

class MSE(Node):
    def __init__(self, y, a):
        Node.__init__(self, [y, a])
    
    def forward(self):
        y = self.inputs[0].value.reshape(-1, 1)
        a = self.inputs[1].value.reshape(-1, 1)
        assert y.shape == a.shape, 'y.shape is not a.shape'
        
        self.m = self.inputs[0].value.shape[0]
        self.diff = y - a
        self.value = np.mean(self.diff**2)
    
    def backward(self):
        self.gradients[self.inputs[0]] = (2 / self.m) * self.diff
        self.gradients[self.inputs[1]] = (-2 / self.m) * self.diff

def forward_backward(graph):
    # 执行已排序节点的前向传播
    for n in graph:
        n.forward()
    # 执行已排序节点的后向传播
    for n in graph[::-1]:
        n.backward()

def toplogic(graph):
    """
    拓扑排序
    """
    sorted_node = []
    
    while len(graph) > 0:
        all_inputs = []
        all_outputs = []
        
        for n in graph:
            all_inputs += graph[n]
            all_outputs.append(n)
        
        all_inputs = set(all_inputs)
        all_outputs = set(all_outputs)
        
        need_remove = all_outputs - all_inputs
        
        if len(need_remove) > 0:
            node = random.choice(list(need_remove))
            
            need_to_visited = [node]
            
            if len(graph) == 1:
                need_to_visited += graph[node]
            
            graph.pop(node)
            sorted_node += need_to_visited
            
            for _, links in graph.items():
                if node in links:
                    links.remove(node)
        else:
            break
    return sorted_node

from collections import defaultdict

def convert_feed_dict_to_graph(feed_dict):
    computing_graph = defaultdict(list)
    nodes = [n for n in feed_dict]
    
    while nodes:
        n = nodes.pop(0)
        
        if isinstance(n, Placeholder):
            n.value = feed_dict[n]
        
        if n in computing_graph:
            continue
        
        for m in n.outputs:
            computing_graph[n].append(m)
            nodes.append(m)
        
    return computing_graph

def topological_sort_feed_dict(feed_dict):
    graph = convert_feed_dict_to_graph(feed_dict)
    return toplogic(graph)
        

def optimize(trainables, learning_rate=1e-2):
    # 有很多其他的优化方法,这里只实现一个简单的例子
    for t in trainables:
        t.value += -1 * learning_rate * t.gradients[t]
import numpy as np
from sklearn.datasets import load_boston
from sklearn.utils import shuffle, resample

# 加载数据
data = load_boston()
X_ = data['data']
y_ = data['target']

# 归一化
X_ = (X_ - np.mean(X_, axis=0)) / np.std(X_, axis=0)

n_features = X_.shape[1]
n_hidden = 10
W1_ = np.random.randn(n_features, n_hidden)
b1_ = np.zeros(n_hidden)
W2_ = np.random.randn(n_hidden, 1)
b2_ = np.zeros(1)

# 网络
X, y = Placeholder(), Placeholder()
W1, b1 = Placeholder(), Placeholder()
W2, b2 = Placeholder(), Placeholder()

l1 = Linear(X, W1, b1)
s1 = Sigmoid(l1)
l2 = Linear(s1, W2, b2)
cost = MSE(y, l2)

feed_dict = {
    X: X_,
    y: y_,
    W1: W1_,
    b1: b1_,
    W2: W2_,
    b2: b2_
}

epochs = 2000
m = X_.shape[0]
batch_size = 16
steps_per_epoch = m // batch_size

graph = topological_sort_feed_dict(feed_dict)
trainables = [W1, b1, W2, b2]
print("Total number of examples = {}".format(m))
Total number of examples = 506
losses = []
for i in range(epochs):
    loss = 0
    for j in range(steps_per_epoch):
        # step1: 加载数据
        X_batch, y_batch = resample(X_, y_, n_samples=batch_size)
        
        X.value = X_batch
        y.value = y_batch
        
        # step 2
        forward_backward(graph)
        
        rate = 1e-2
        
        optimize(trainables, rate)
        loss += graph[-1].value
    losses.append(loss/steps_per_epoch)
    if i % 100 == 0:
        print('epoch: {}, loss: {:.3f}'.format(i+1, loss/steps_per_epoch))
epoch: 1, loss: 156.041
epoch: 101, loss: 7.104
epoch: 201, loss: 5.069
epoch: 301, loss: 4.563
epoch: 401, loss: 4.201
epoch: 501, loss: 3.764
epoch: 601, loss: 3.474
epoch: 701, loss: 3.783
epoch: 801, loss: 3.587
epoch: 901, loss: 3.141
epoch: 1001, loss: 3.449
epoch: 1101, loss: 4.105
epoch: 1201, loss: 3.464
epoch: 1301, loss: 2.986
epoch: 1401, loss: 3.237
epoch: 1501, loss: 2.817
epoch: 1601, loss: 3.482
epoch: 1701, loss: 3.022
epoch: 1801, loss: 3.317
epoch: 1901, loss: 3.016
plt.plot(losses)

在这里插入图片描述

  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 5
    评论
以下是使用Python编写多层感知机模型一个例子: ```python import numpy as np # 定义sigmoid激活函数 def sigmoid(x): return 1 / (1 + np.exp(-x)) # 定义多层感知机模型 class MLP: def __init__(self, input_dim, hidden_dim, output_dim): self.input_dim = input_dim self.hidden_dim = hidden_dim self.output_dim = output_dim # 初始化权重和偏置 self.weights1 = np.random.randn(input_dim, hidden_dim) self.bias1 = np.zeros((1, hidden_dim)) self.weights2 = np.random.randn(hidden_dim, output_dim) self.bias2 = np.zeros((1, output_dim)) # 前向传播函数 def forward(self, X): # 计算第一层的输出 self.z1 = np.dot(X, self.weights1) + self.bias1 self.a1 = sigmoid(self.z1) # 计算第二层的输出 self.z2 = np.dot(self.a1, self.weights2) + self.bias2 self.a2 = sigmoid(self.z2) return self.a2 # 反向传播函数 def backward(self, X, y, output): # 计算输出误差 self.output_error = y - output # 计算输出层的梯度 self.output_gradient = self.output_error * sigmoid(self.z2) * (1 - sigmoid(self.z2)) # 计算隐藏层的误差和梯度 self.hidden_error = np.dot(self.output_gradient, self.weights2.T) self.hidden_gradient = self.hidden_error * sigmoid(self.z1) * (1 - sigmoid(self.z1)) # 更新权重和偏置 self.weights2 += np.dot(self.a1.T, self.output_gradient) self.bias2 += np.sum(self.output_gradient, axis=0, keepdims=True) self.weights1 += np.dot(X.T, self.hidden_gradient) self.bias1 += np.sum(self.hidden_gradient, axis=0) # 拟合函数 def fit(self, X, y, epochs=1000): for i in range(epochs): output = self.forward(X) self.backward(X, y, output) # 预测函数 def predict(self, X): output = self.forward(X) predictions = np.round(output) return predictions ``` 在上面的代码中,我们定义了一个包含两个隐藏层的多层感知机模型。我们使用sigmoid作为激活函数,并使用反向传播算法来更新权重和偏置。我们可以使用fit函数对模型进行拟合,使用predict函数来进行预测。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值