FF或称FFNN模型:前馈神经网络和感知机

目录

一、知识基础

1.概念

前馈神经网络和感知机,信息从前(输入)往后(输出)流动,一般用反向传播(BP)来训练。算是一种监督学习。

2.图解

FFNN

二、模型demo

1.库的安装

查看notebook的python所在位置

import sys
print(sys.version)
print(sys.executable)
! pip install muffnn==1.2.0

然后我们运行github上的setup.py文件来配置环境:

! python setup.py build develop

配置成功!(后来发现已经在一个文件夹里了hiahia—)
现在我们看看它的帮助使用的文档——

# 导入包
from muffnn import MLPClassifier
#导入数据
X, y = load_some_data()
#模型框架构造
mlp = MLPClassifier()
#模型拟合,确定参数
mlp.fit(X, y)
#导入新数据,进行分类
X_new = load_some_unlabeled_data()
y_pred = mlp.predict(X_new)

因为TensorFlow在训练的时候会使用.pkl文件,所以如果要在这个模型上使用文件的话

import pickle
with open('est.pkl', 'wb') as fp:
    pickle.dump(est, fp)
  • pickle.dumps()将对象obj对象序列化并返回一个byte对象
  • pickle.loads(),从字节对象中读取被封装的对象

2.学习封装的Layer函数

我也不知道前面在做些什么,也许正题才刚刚开始

example.py
import numpy as np
class Example:
    """
    Immutable class representing one example in a dataset.
    定义一个不变的类来代表数据集里的一个example
    """
	'''
	__slot__只允许对实例添加列出来的两个属性
	'''
    __slots__ = ('_data', '_target')
	'''
	定义__init__后,执行实例化的过程须传入定义的参数
	'''
    def __init__(self, data, target):
        self._data = np.array(data, dtype=float)
        self._target = np.array(target, dtype=float)

    @property
    def data(self):
        return self._data

    @property
    def target(self):
        return self._target

    def __getstate__(self):
        return {'data': self.data, 'target': self.target}

    def __setstate__(self, state):
        self._data = state['data']
        self._target = state['target']

    def __repr__(self):
        data = ' '.join(str(round(x, 2)) for x in self.data)
        # round(a,b) 对a四舍五入,保留b位小数
        target = ' '.join(str(round(x, 2)) for x in self.target)
        return '({})->({})'.format(data, target)

草草的就学完了第一个py文件,等运用的时候我们再回来康康!

utility.py
import os
import errno
import functools
import itertools


def repeated(iterable, times):
    for _ in range(times):
    # 下划线表示 临时变量, 仅用一次,后面无需再用到
        yield from iterable
        # yield from iterable本质上等于for item in iterable: yield item的缩写版 
        #在 for 循环执行时,每次循环都会执行函数内部的代码,执行到 yield b 时,函数就返回一个迭代值,  


def batched(iterable, size):
'''
这个函数用来提取batch(批量),迭代样本的时候是需要抽取的,一个固定size的样本batch
'''
    batch = []
    for element in iterable:
        batch.append(element)
        if len(batch) == size:
            yield batch
            batch = []
    if batch:
        yield batch


def averaged(callable_, batch):
'''
这里对batch的每个元素进行callable_处理,最后返回callable_结果为1的样本占比
'''
    overall = None
    for element in batch:
        current = callable_(element)
        overall = overall + current if overall else current
    return overall / len(batch)


def listify(fn=None, wrapper=list):
    """
    From http://stackoverflow.com/a/12377059/1079110
    这是个该shi的啥玩意我也不太懂,简单理解就是一个装饰器,具体后面用到这个函数再说吧
    """
    def listify_return(fn):
        @functools.wraps(fn)
        #等价于 wrapper = functools.wraps(fn)(wrapper)
        def listify_helper(*args, **kw):
            return wrapper(fn(*args, **kw))
        return listify_helper

    if fn is None:
        return listify_return
    return listify_return(fn)


def ensure_folder(path):
'''
检查能不能创建路径
'''
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno == errno.EEXIST:
            return
        raise


def hstack_lines(blocks, sep=' '):
'''
这看起来像一个切词函数
最终还是输出一个右对齐的字符串
也许是在变换格式?
呵......
'''
    blocks = [x.split('\n') for x in blocks]
    height = max(len(block) for block in blocks)
    widths = [max(len(line) for line in block) for block in blocks]
    output = ''
    for y in range(height):
        for x, w in enumerate(widths):
        # enumerate将其组成一个索引序列,利用它可以同时获得索引x和值w
            cell = blocks[x][y] if y < len(blocks[x]) else ''
            output += cell.rjust(w, ' ') + sep
            ##将字符串调整为宽带为w,并且右对齐的字符串,填充 ‘ ’
        output += '\n'
    return output

def pairwise(iterable):
'''
生成一对一对的数据
'''
    a, b = itertools.tee(iterable)
    # 使用itertools.tee可以让一个生成器被多次完整遍历,一般要设置这个默认值的
    next(b, None)
    return zip(a, b)
    #把每一对a,b都打包在一起

呕,快吐了,这就是底层代码带来的杀伤力
K.O…

dataset.py

这是一个导入数据集并且将数据集规范化的函数

import array
import os
import shutil
import struct
import gzip
from urllib.request import urlopen
import numpy as np
from layered.example import Example
from layered.utility import ensure_folder


class Dataset:

    urls = []
    cache = True

    def __init__(self):
        cache = type(self).cache
        if cache and self._is_cached():
            print('Load cached dataset')
            self.load()
        else:
            filenames = [self.download(x) for x in type(self).urls]
            self.training, self.testing = self.parse(*filenames)
            if cache:
                self.dump()

    @classmethod
    def folder(cls):
        name = cls.__name__.lower()
        home = os.path.expanduser('~')
        folder = os.path.join(home, '.layered/dataset', name)
        ensure_folder(folder)
        return folder

    def parse(self):
        """
        Subclass responsibility. The filenames of downloaded files will be
        passed as individual parameters to this function. Therefore, it must
        accept as many parameters as provided class-site urls. Should return a
        tuple of training examples and testing examples.
        """
        raise NotImplementedError

    def dump(self):
        np.save(self._training_path(), self.training)
        np.save(self._testing_path(), self.testing)

    def load(self):
        self.training = np.load(self._training_path())
        self.testing = np.load(self._testing_path())

    def download(self, url):
        _, filename = os.path.split(url)
        filename = os.path.join(self.folder(), filename)
        print('Download', filename)
        with urlopen(url) as response, open(filename, 'wb') as file_:
            shutil.copyfileobj(response, file_)
        return filename

    @staticmethod
    def split(examples, ratio=0.8):
        """
        Utility function that can be used within the parse() implementation of
        sub classes to split a list of example into two lists for training and
        testing.
        """
        split = int(ratio * len(examples))
        return examples[:split], examples[split:]

    def _is_cached(self):
        if not os.path.exists(self._training_path()):
            return False
        if not os.path.exists(self._testing_path()):
            return False
        return True

    def _training_path(self):
        return os.path.join(self.folder(), 'training.npy')

    def _testing_path(self):
        return os.path.join(self.folder(), 'testing.npy')


class Test(Dataset):

    cache = False

    def __init__(self, amount=10):
        self.amount = amount
        super().__init__()

    def parse(self):
        examples = [Example([1, 2, 3], [1, 2, 3]) for _ in range(self.amount)]
        return self.split(examples)


class Regression(Dataset):
    """
    Synthetically generated dataset for regression. The task is to predict the
    sum and product of all the input values. All values are normalized between
    zero and one.
    """

    cache = False

    def __init__(self, amount=10000, inputs=10):
        self.amount = amount
        self.inputs = inputs
        super().__init__()

    def parse(self):
        data = np.random.rand(self.amount, self.inputs)
        products = np.prod(data, axis=1)
        products = products / np.max(products)
        sums = np.sum(data, axis=1)
        sums = sums / np.max(sums)
        targets = np.column_stack([sums, products])
        examples = [Example(x, y) for x, y in zip(data, targets)]
        return self.split(examples)


class Modulo(Dataset):
    """
    Sythetically generated classification dataset. The task is to predict the
    modulo classes of random integers encoded as bit arrays of length 32.
    """

    cache = False

    def __init__(self, amount=60000, inputs=32, classes=7):
        self.amount = amount
        self.inputs = inputs
        self.classes = classes
        super().__init__()

    def parse(self):
        data = np.random.randint(0, self.inputs ** 2 - 1, self.amount)
        mods = np.mod(data, self.classes)
        targets = np.zeros((self.amount, self.classes))
        for index, mod in enumerate(mods):
            targets[index][mod] = 1
        data = (((data[:, None] & (1 << np.arange(self.inputs)))) > 0)
        examples = [Example(x, y) for x, y in zip(data, targets)]
        return self.split(examples)


class Mnist(Dataset):
    """
    The MNIST database of handwritten digits, available from this page, has a
    training set of 60,000 examples, and a test set of 10,000 examples. It is a
    subset of a larger set available from NIST. The digits have been
    size-normalized and centered in a fixed-size image. It is a good database
    for people who want to try learning techniques and pattern recognition
    methods on real-world data while spending minimal efforts on preprocessing
    and formatting. (from http://yann.lecun.com/exdb/mnist/)
    """

    urls = [
        'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz',
        'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz',
        'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz',
        'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz',
    ]

    def parse(self, train_x, train_y, test_x, test_y):
        # pylint: disable=arguments-differ
        training = list(self.read(train_x, train_y))
        testing = list(self.read(test_x, test_y))
        return training, testing

    @staticmethod
    def read(data, labels):
        images = gzip.open(data, 'rb')
        _, size, rows, cols = struct.unpack('>IIII', images.read(16))
        image_bin = array.array('B', images.read())
        images.close()

        labels = gzip.open(labels, 'rb')
        _, size2 = struct.unpack('>II', labels.read(8))
        assert size == size2
        label_bin = array.array('B', labels.read())
        labels.close()

        for i in range(size):
            data = image_bin[i * rows * cols:(i + 1) * rows * cols]
            data = np.array(data).reshape(rows * cols) / 255
            target = np.zeros(10)
            target[label_bin[i]] = 1
            yield Example(data, target)

cost.py

计算代价函数

import numpy as np


class Cost:

    def __call__(self, prediction, target):
        raise NotImplementedError

    def delta(self, prediction, target):
        raise NotImplementedError


class SquaredError(Cost):
    """
    Fast and simple cost function.
    """

    def __call__(self, prediction, target):
        return (prediction - target) ** 2 / 2

    def delta(self, prediction, target):
        return prediction - target


class CrossEntropy(Cost):
    """
    Logistic cost function used for classification tasks. Learns faster in the
    beginning than SquaredError because large errors are penalized
    exponentially. This makes sense in classification since only the best class
    will be the predicted one.
    计算交叉熵
    """

    def __init__(self, epsilon=1e-11):
        self.epsilon = epsilon

    def __call__(self, prediction, target):
        clipped = np.clip(prediction, self.epsilon, 1 - self.epsilon)
        cost = target * np.log(clipped) + (1 - target) * np.log(1 - clipped)
        return -cost

    def delta(self, prediction, target):
        denominator = np.maximum(prediction - prediction ** 2, self.epsilon)
        delta = (prediction - target) / denominator
        assert delta.shape == target.shape == prediction.shape
        return delta

activation.py

定义各种激活函数

import numpy as np


class Activation:

    def __call__(self, incoming):
        raise NotImplementedError

    def delta(self, incoming, outgoing, above):
        """
        Compute the derivative of the cost with respect to the input of this
        activation function. Outgoing is what this function returned in the
        forward pass and above is the derivative of the cost with respect to
        the outgoing activation.
        """
        raise NotImplementedError


class Identity(Activation):

    def __call__(self, incoming):
        return incoming

    def delta(self, incoming, outgoing, above):
        delta = np.ones(incoming.shape).astype(float)
        return delta * above


class Sigmoid(Activation):

    def __call__(self, incoming):
        return 1 / (1 + np.exp(-incoming))

    def delta(self, incoming, outgoing, above):
        delta = outgoing * (1 - outgoing)
        return delta * above


class Relu(Activation):

    def __call__(self, incoming):
        return np.maximum(incoming, 0)

    def delta(self, incoming, outgoing, above):
        delta = np.greater(incoming, 0).astype(float)
        return delta * above


class Softmax(Activation):

    def __call__(self, incoming):
        # The constant doesn't change the expression but prevents overflows.
        constant = np.max(incoming)
        exps = np.exp(incoming - constant)
        return exps / exps.sum()

    def delta(self, incoming, outgoing, above):
        delta = outgoing * above
        sum_ = delta.sum(axis=delta.ndim - 1, keepdims=True)
        delta -= outgoing * sum_
        return delta


class SparseField(Activation):

    def __init__(self, inhibition=0.05, leaking=0.0):
        self.inhibition = inhibition
        self.leaking = leaking

    def __call__(self, incoming):
        count = len(incoming)
        length = int(np.sqrt(count))
        assert length ** 2 == count, 'layer size must be a square'
        field = incoming.copy().reshape((length, length))
        radius = int(np.sqrt(self.inhibition * count)) // 2
        assert radius, 'no inhibition due to small factor'
        outgoing = np.zeros(field.shape)
        while True:
            x, y = np.unravel_index(field.argmax(), field.shape)
            if field[x, y] <= 0:
                break
            outgoing[x, y] = 1
            surrounding = np.s_[
                max(x - radius, 0):min(x + radius + 1, length),
                max(y - radius, 0):min(y + radius + 1, length)]
            field[surrounding] = 0
            assert field[x, y] == 0
        outgoing = outgoing.reshape(count)
        outgoing = np.maximum(outgoing, self.leaking * incoming)
        return outgoing

    def delta(self, incoming, outgoing, above):
        delta = np.greater(outgoing, 0).astype(float)
        return delta * above


class SparseRange(Activation):
    """
    E%-Max Winner-Take-All.

    Binary activation. First, the activation function is applied. Then all
    neurons within the specified range below the strongest neuron are set to
    one. All others are set to zero. The gradient is the one of the activation
    function for active neurons and zero otherwise.

    See: A Second Function of Gamma Frequency Oscillations: An E%-Max
    Winner-Take-All Mechanism Selects Which Cells Fire. (2009)
    """

    def __init__(self, range_=0.3, function=Sigmoid()):
        assert 0 < range_ < 1
        self._range = range_
        self._function = function

    def __call__(self, incoming):
        incoming = self._function(incoming)
        threshold = self._threshold(incoming)
        active = (incoming >= threshold)
        outgoing = np.zeros(incoming.shape)
        outgoing[active] = 1
        # width = active.sum() * 80 / 1000
        # print('|', '#' * width, ' ' * (80 - width), '|')
        return outgoing

    def delta(self, incoming, outgoing, above):
        # return self._function.delta(incoming, outgoing, outgoing * above)
        return outgoing * self._function.delta(incoming, outgoing, above)

    def _threshold(self, incoming):
        min_, max_ = incoming.min(), incoming.max()
        threshold = min_ + (max_ - min_) * (1 - self._range)
        return threshold

network.py

网络的一些结构具体化,feed啥的

import operator
import numpy as np


class Layer:

    def __init__(self, size, activation):
        assert size and isinstance(size, int)
        self.size = size
        self.activation = activation()
        self.incoming = np.zeros(size)
        self.outgoing = np.zeros(size)
        assert len(self.incoming) == len(self.outgoing) == self.size

    def __len__(self):
        assert len(self.incoming) == len(self.outgoing)
        return len(self.incoming)

    def __repr__(self):
        return repr(self.outgoing)

    def __str__(self):
        table = zip(self.incoming, self.outgoing)
        rows = [' /'.join('{: >6.3f}'.format(x) for x in row) for row in table]
        return '\n'.join(rows)

    def apply(self, incoming):
        """
        Store the incoming activation, apply the activation function and store
        the result as outgoing activation.
        """
        assert len(incoming) == self.size
        self.incoming = incoming
        outgoing = self.activation(self.incoming)
        assert len(outgoing) == self.size
        self.outgoing = outgoing

    def delta(self, above):
        """
        The derivative of the activation function at the current state.
        """
        return self.activation.delta(self.incoming, self.outgoing, above)


class Matrices:

    def __init__(self, shapes, elements=None):
        self.shapes = shapes
        length = sum(x * y for x, y in shapes)
        if elements is not None:
            assert len(elements) == length
            elements = elements.copy()
        else:
            elements = np.zeros(length)
        self.flat = elements

    def __len__(self):
        return len(self.shapes)

    def __getitem__(self, index):
        if hasattr(index, '__len__'):
            assert isinstance(index[0], int)
            return self[index[0]][index[1:]]
        if isinstance(index, slice):
            return [self[i] for i in self._range_from_slice(index)]
        slice_ = self._locate(index)
        data = self.flat[slice_]
        data = data.reshape(self.shapes[index])
        return data

    def __setitem__(self, index, data):
        if hasattr(index, '__len__'):
            assert isinstance(index[0], int)
            self[index[0]][index[1:]] = data
            return
        if isinstance(index, slice):
            for i in self._range_from_slice(index):
                self[i] = data
            return
        slice_ = self._locate(index)
        data = data.reshape(slice_.stop - slice_.start)
        self.flat[slice_] = data

    def __getattr__(self, name):
        # Tunnel not found properties to the underlying array.
        flat = super().__getattribute__('flat')
        return getattr(flat, name)

    def __setattr_(self, name, value):
        # Ensure that the size of the underlying array doesn't change.
        if name == 'flat':
            assert value.shape == self.flat.shape
        super().__setattr__(name, value)

    def copy(self):
        return Matrices(self.shapes, self.flat.copy())

    def __add__(self, other):
        return self._operation(other, lambda x, y: x + y)

    def __sub__(self, other):
        return self._operation(other, lambda x, y: x - y)

    def __mul__(self, other):
        return self._operation(other, lambda x, y: x * y)

    def __truediv__(self, other):
        return self._operation(other, lambda x, y: x / y)

    __rmul__ = __mul__

    __radd__ = __add__

    def _operation(self, other, operation):
        try:
            other = other.flat
        except AttributeError:
            pass
        return Matrices(self.shapes, operation(self.flat, other))

    def _locate(self, index):
        assert isinstance(index, int), (
            'Only single elemente can be indiced in the first dimension.')
        if index < 0:
            index = len(self.shapes) + index
        if not 0 <= index < len(self.shapes):
            raise IndexError
        offset = sum(x * y for x, y in self.shapes[:index])
        length = operator.mul(*self.shapes[index])
        return slice(offset, offset + length)

    def _range_from_slice(self, slice_):
        start = slice_.start if slice_.start else 0
        stop = slice_.stop if slice_.stop else len(self.shapes)
        step = slice_.step if slice_.step else 1
        return range(start, stop, step)

    def __str__(self):
        return str(len(self.flat)) + str(self.flat)


class Network:

    def __init__(self, layers):
        self.layers = layers
        self.sizes = tuple(layer.size for layer in self.layers)
        # Weight matrices have the dimensions of the two layers around them.
        # Also, there is an additional bias input to each weight matrix.
        self.shapes = zip(self.sizes[:-1], self.sizes[1:])
        self.shapes = [(x + 1, y) for x, y in self.shapes]
        # Weight matrices are in between the layers.
        assert len(self.shapes) == len(self.layers) - 1

    def feed(self, weights, data):
        """
        Evaluate the network with alternative weights on the input data and
        return the output activation.
        """
        assert len(data) == self.layers[0].size
        self.layers[0].apply(data)
        # Propagate trough the remaining layers.
        connections = zip(self.layers[:-1], weights, self.layers[1:])
        for previous, weight, current in connections:
            incoming = self.forward(weight, previous.outgoing)
            current.apply(incoming)
        # Return the activations of the output layer.
        return self.layers[-1].outgoing

    @staticmethod
    def forward(weight, activations):
        # Add bias input of one.
        activations = np.insert(activations, 0, 1)
        assert activations[0] == 1
        right = activations.dot(weight)
        return right

    @staticmethod
    def backward(weight, activations):
        left = activations.dot(weight.transpose())
        # Don't expose the bias input of one.
        left = left[1:]
        return left
problem.py
import os
import yaml
import layered.cost
import layered.dataset
import layered.activation
from layered.network import Layer


class Problem:

    def __init__(self, content=None):
        """
        Construct a problem. If content is specified, try to load it as a YAML
        path and otherwise treat it as an inline YAML string.
        """
        if content and os.path.isfile(content):
            with open(content) as file_:
                self.parse(file_)
        elif content:
            self.parse(content)
        self._validate()

    def __str__(self):
        keys = self.__dict__.keys() & self._defaults().keys()
        return str({x: getattr(self, x) for x in keys})

    def parse(self, definition):
        definition = yaml.load(definition)
        self._load_definition(definition)
        self._load_symbols()
        self._load_layers()
        self._load_weight_tying()
        assert not definition, (
            'unknown properties {} in problem definition'
            .format(', '.join(definition.keys())))

    def _load_definition(self, definition):
        # The empty dictionary causes defaults to be loaded even if the
        # definition is None.
        if not definition:
            definition = {}
        for name, default in self._defaults().items():
            type_ = type(default)
            self.__dict__[name] = type_(definition.pop(name, default))

    def _load_symbols(self):
        # pylint: disable=attribute-defined-outside-init
        self.cost = self._find_symbol(layered.cost, self.cost)()
        self.dataset = self._find_symbol(layered.dataset, self.dataset)()

    def _load_layers(self):
        for index, layer in enumerate(self.layers):
            size, activation = layer.pop('size'), layer.pop('activation')
            activation = self._find_symbol(layered.activation, activation)
            self.layers[index] = Layer(size, activation)

    def _load_weight_tying(self):
        # pylint: disable=attribute-defined-outside-init
        self.weight_tying = [[y.split(',') for y in x]
                             for x in self.weight_tying]
        for i, group in enumerate(self.weight_tying):
            for j, slices in enumerate(group):
                for k, slice_ in enumerate(slices):
                    slice_ = [int(s) if s else None for s in slice_.split(':')]
                    self.weight_tying[i][j][k] = slice(*slice_)
        for i, group in enumerate(self.weight_tying):
            for j, slices in enumerate(group):
                assert not slices[0].start and not slices[0].step, (
                    'Ranges are not allowed in the first dimension.')
                self.weight_tying[i][j][0] = slices[0].stop

    def _find_symbol(self, module, name, fallback=None):
        """
        Find the symbol of the specified name inside the module or raise an
        exception.
        """
        if not hasattr(module, name) and fallback:
            return self._find_symbol(module, fallback, None)
        return getattr(module, name)

    def _validate(self):
        num_input = len(self.dataset.training[0].data)
        num_output = len(self.dataset.training[0].target)
        if self.layers:
            assert self.layers[0].size == num_input, (
                'the size of the input layer must match the training data')
            assert self.layers[-1].size == num_output, (
                'the size of the output layer must match the training labels')

    @staticmethod
    def _defaults():
        return {
            'cost': 'SquaredError',
            'dataset': 'Modulo',
            'layers': [],
            'epochs': 1,
            'batch_size': 1,
            'learning_rate': 0.1,
            'momentum': 0.0,
            'weight_scale': 0.1,
            'weight_mean': 0.0,
            'weight_decay': 0.0,
            'weight_tying': [],
            'evaluate_every': 1000,
        }

evaluation.py

对预测结果进行评价

import numpy as np


def compute_costs(network, weights, cost, examples):
    prediction = [network.feed(weights, x.data) for x in examples]
    costs = [cost(x, y.target).mean() for x, y in zip(prediction, examples)]
    return costs


def compute_error(network, weights, examples):
    prediction = [network.feed(weights, x.data) for x in examples]
    error = sum(bool(np.argmax(x) != np.argmax(y.target)) for x, y in
                zip(prediction, examples)) / len(examples)
    return error
gradient.py

计算梯度

import math
import functools
import multiprocessing
import numpy as np
from layered.network import Matrices
from layered.utility import batched


class Gradient:

    def __init__(self, network, cost):
        self.network = network
        self.cost = cost

    def __call__(self, weights, example):
        raise NotImplementedError


class Backprop(Gradient):
    """
    Use the backpropagation algorithm to efficiently determine the gradient of
    the cost function with respect to each individual weight.
    """

    def __call__(self, weights, example):
        prediction = self.network.feed(weights, example.data)
        delta_output = self._delta_output(prediction, example.target)
        delta_layers = self._delta_layers(weights, delta_output)
        delta_weights = self._delta_weights(delta_layers)
        return delta_weights

    def _delta_output(self, prediction, target):
        assert len(target) == self.network.layers[-1].size
        # The derivative with respect to the output layer is computed as the
        # product of error derivative and local derivative at the layer.
        delta_cost = self.cost.delta(prediction, target)
        delta_output = self.network.layers[-1].delta(delta_cost)
        assert len(delta_cost) == len(delta_output) == len(target)
        return delta_output

    def _delta_layers(self, weights, delta_output):
        # Propagate backwards through the hidden layers but not the input
        # layer. The current weight matrix is the one to the right of the
        # current layer.
        gradient = [delta_output]
        hidden = list(zip(weights[1:], self.network.layers[1:-1]))
        assert all(x.shape[0] - 1 == len(y) for x, y in hidden)
        for weight, layer in reversed(hidden):
            delta = self._delta_layer(layer, weight, gradient[-1])
            gradient.append(delta)
        return reversed(gradient)

    def _delta_layer(self, layer, weight, above):
        # The gradient at a layer is computed as the derivative of both the
        # local activation and the weighted sum of the derivatives in the
        # deeper layer.
        backward = self.network.backward(weight, above)
        delta = layer.delta(backward)
        assert len(layer) == len(backward) == len(delta)
        return delta

    def _delta_weights(self, delta_layers):
        # The gradient with respect to the weights is computed as the gradient
        # at the target neuron multiplied by the activation of the source
        # neuron.
        gradient = Matrices(self.network.shapes)
        prev_and_delta = zip(self.network.layers[:-1], delta_layers)
        for index, (previous, delta) in enumerate(prev_and_delta):
            # We want to tweak the bias weights so we need them in the
            # gradient.
            activations = np.insert(previous.outgoing, 0, 1)
            assert activations[0] == 1
            gradient[index] = np.outer(activations, delta)
        return gradient


class NumericalGradient(Gradient):
    """
    Approximate the gradient for each weight individually by sampling the error
    function slightly above and below the current value of the weight.
    """

    def __init__(self, network, cost, distance=1e-5):
        super().__init__(network, cost)
        self.distance = distance

    def __call__(self, weights, example):
        """
        Modify each weight individually in both directions to calculate a
        numeric gradient of the weights.
        """
        # We need a copy of the weights that we can modify to evaluate the cost
        # function on.
        modified = Matrices(weights.shapes, weights.flat.copy())
        gradient = Matrices(weights.shapes)
        for i, connection in enumerate(weights):
            for j, original in np.ndenumerate(connection):
                # Sample above and below and compute costs.
                modified[i][j] = original + self.distance
                above = self._evaluate(modified, example)
                modified[i][j] = original - self.distance
                below = self._evaluate(modified, example)
                # Restore the original value so we can reuse the weight matrix
                # for the next iteration.
                modified[i][j] = original
                # Compute the numeric gradient.
                sample = (above - below) / (2 * self.distance)
                gradient[i][j] = sample
        return gradient

    def _evaluate(self, weights, example):
        prediction = self.network.feed(weights, example.data)
        cost = self.cost(prediction, example.target)
        assert cost.shape == prediction.shape
        return cost.sum()


class CheckedBackprop(Gradient):
    """
    Computes the gradient both analytically trough backpropagation and
    numerically to validate the backpropagation implementation and derivatives
    of activation functions and cost functions. This is slow by its nature and
    it's recommended to validate derivatives on small networks.
    """

    def __init__(self, network, cost, distance=1e-5, tolerance=1e-8):
        self.tolerance = tolerance
        super().__init__(network, cost)
        self.analytic = Backprop(network, cost)
        self.numeric = NumericalGradient(network, cost, distance)

    def __call__(self, weights, example):
        analytic = self.analytic(weights, example)
        numeric = self.numeric(weights, example)
        distances = np.absolute(analytic.flat - numeric.flat)
        worst = distances.max()
        if worst > self.tolerance:
            print('Gradient differs by {:.2f}%'.format(100 * worst))
        else:
            print('Gradient looks good')
        return analytic


class BatchBackprop:
    """
    Calculate the average gradient over a batch of examples.
    """

    def __init__(self, network, cost):
        self.backprop = Backprop(network, cost)

    def __call__(self, weights, examples):
        gradient = Matrices(weights.shapes)
        for example in examples:
            gradient += self.backprop(weights, example)
        return gradient / len(examples)


class ParallelBackprop:
    """
    Alternative to BatchBackprop that yields the same results but utilizes
    multiprocessing to make use of more than one processor core.
    """

    def __init__(self, network, cost, workers=4):
        self.backprop = BatchBackprop(network, cost)
        self.workers = workers
        self.pool = multiprocessing.Pool(self.workers)

    def __call__(self, weights, examples):
        batch_size = int(math.ceil(len(examples) / self.workers))
        batches = list(batched(examples, batch_size))
        sizes = [len(x) / batch_size for x in batches]
        sizes = [x / sum(sizes) for x in sizes]
        assert len(batches) <= self.workers
        assert sum(sizes) == 1
        compute = functools.partial(self.backprop, weights)
        gradients = self.pool.map(compute, batches)
        return sum(x * y for x, y in zip(gradients, sizes))
plot.py

画图工具

# pylint: disable=wrong-import-position
import collections
import time
import warnings
import inspect
import threading
import matplotlib

# Don't call the code if Sphinx inspects the file mocking external imports.
if inspect.ismodule(matplotlib):  # noqa
    # On Mac force backend that works with threading.
    if matplotlib.get_backend() == 'MacOSX':
        matplotlib.use('TkAgg')
    # Hide matplotlib deprecation message.
    warnings.filterwarnings('ignore', category=matplotlib.cbook.mplDeprecation)
    # Ensure available interactive backend.
    if matplotlib.get_backend() not in matplotlib.rcsetup.interactive_bk:
        print('No visual backend available. Maybe you are inside a virtualenv '
              'that was created without --system-site-packages.')

import matplotlib.pyplot as plt


class Interface:

    def __init__(self, title='', xlabel='', ylabel='', style=None):
        self._style = style or {}
        self._title = title
        self._xlabel = xlabel
        self._ylabel = ylabel
        self.xdata = []
        self.ydata = []
        self.width = 0
        self.height = 0

    @property
    def style(self):
        return self._style

    @property
    def title(self):
        return self._title

    @property
    def xlabel(self):
        return self._xlabel

    @property
    def ylabel(self):
        return self._ylabel


class State:

    def __init__(self):
        self.running = False


class Window:

    def __init__(self, refresh=0.5):
        self.refresh = refresh
        self.thread = None
        self.state = State()
        self.figure = plt.figure()
        self.interfaces = []
        plt.ion()
        plt.show()

    def register(self, position, interface):
        axis = self.figure.add_subplot(
            position, title=interface.title,
            xlabel=interface.xlabel, ylabel=interface.ylabel)
        axis.get_xaxis().set_ticks([])
        line, = axis.plot(interface.xdata, interface.ydata, **interface.style)
        self.interfaces.append((axis, line, interface))

    def start(self, work):
        """
        Hand the main thread to the window and continue work in the provided
        function. A state is passed as the first argument that contains a
        `running` flag. The function is expected to exit if the flag becomes
        false. The flag can also be set to false to stop the window event loop
        and continue in the main thread after the `start()` call.
        """
        assert threading.current_thread() == threading.main_thread()
        assert not self.state.running
        self.state.running = True
        self.thread = threading.Thread(target=work, args=(self.state,))
        self.thread.start()
        while self.state.running:
            try:
                before = time.time()
                self.update()
                duration = time.time() - before
                plt.pause(max(0.001, self.refresh - duration))
            except KeyboardInterrupt:
                self.state.running = False
                self.thread.join()
                return

    def stop(self):
        """
        Close the window and stops the worker thread. The main thread will
        resume with the next command after the `start()` call.
        """
        assert threading.current_thread() == self.thread
        assert self.state.running
        self.state.running = False

    def update(self):
        """
        Redraw the figure to show changed data. This is automatically called
        after `start()` was run.
        """
        assert threading.current_thread() == threading.main_thread()
        for axis, line, interface in self.interfaces:
            line.set_xdata(interface.xdata)
            line.set_ydata(interface.ydata)
            axis.set_xlim(0, interface.width or 1, emit=False)
            axis.set_ylim(0, interface.height or 1, emit=False)
        self.figure.canvas.draw()


class Plot(Interface):

    def __init__(self, title, xlabel, ylabel, style=None, fixed=None):
        # pylint: disable=too-many-arguments, redefined-variable-type
        super().__init__(title, xlabel, ylabel, style or {})
        self.max_ = 0
        if not fixed:
            self.xdata = []
            self.ydata = []
        else:
            self.xdata = list(range(fixed))
            self.ydata = collections.deque([None] * fixed, maxlen=fixed)
            self.width = fixed

    def __call__(self, values):
        self.ydata += values
        self.max_ = max(self.max_, *values)
        self.height = 1.05 * self.max_
        while len(self.xdata) < len(self.ydata):
            self.xdata.append(len(self.xdata))
        self.width = len(self.xdata) - 1
        assert len(self.xdata) == len(self.ydata)
optimization.py

一些优化算法,主要是基于梯度

class GradientDecent:
    """
    Adapt the weights in the opposite direction of the gradient to reduce the
    error.
    """

    def __call__(self, weights, gradient, learning_rate=0.1):
        return weights - learning_rate * gradient


class Momentum:
    """
    Slow down changes of direction in the gradient by aggregating previous
    values of the gradient and multiplying them in.
    """

    def __init__(self):
        self.previous = None

    def __call__(self, gradient, rate=0.9):
        gradient = gradient.copy()
        if self.previous is None:
            self.previous = gradient.copy()
        else:
            assert self.previous.shape == gradient.shape
            gradient += rate * self.previous
            self.previous = gradient.copy()
        return gradient


class WeightDecay:
    """
    Slowly moves each weight closer to zero for regularization. This can help
    the model to find simpler solutions.
    """

    def __call__(self, weights, rate=1e-4):
        return (1 - rate) * weights


class WeightTying:
    """
    Constraint groups of slices of the gradient to have the same value by
    averaging them. Should be applied to the initial weights and each gradient.
    """

    def __init__(self, *groups):
        for group in groups:
            assert group and hasattr(group, '__len__')
            assert all([isinstance(x[0], int) for x in group])
            assert all([isinstance(y, (slice, int)) for x in group for y in x])
        self.groups = groups

    def __call__(self, matrices):
        matrices = matrices.copy()
        for group in self.groups:
            slices = [matrices[slice_] for slice_ in group]
            assert all([x.shape == slices[0].shape for x in slices]), (
                'All slices within a group must have the same shape. '
                'Shapes are ' + ', '.join(str(x.shape) for x in slices) + '.')
            average = sum(slices) / len(slices)
            assert average.shape == slices[0].shape
            for slice_ in group:
                matrices[slice_] = average
        return matrices

trainer.py

训练器

import functools
import numpy as np
from layered.gradient import BatchBackprop, CheckedBackprop
from layered.network import Network, Matrices
from layered.optimization import (
    GradientDecent, Momentum, WeightDecay, WeightTying)
from layered.utility import repeated, batched
from layered.evaluation import compute_costs, compute_error


class Trainer:
    # pylint: disable=attribute-defined-outside-init, too-many-arguments

    def __init__(self, problem, load=None, save=None,
                 visual=False, check=False):
        self.problem = problem
        self.load = load
        self.save = save
        self.visual = visual
        self.check = check
        self._init_network()
        self._init_training()
        self._init_visualize()

    def _init_network(self):
        """Define model and initialize weights."""
        self.network = Network(self.problem.layers)
        self.weights = Matrices(self.network.shapes)
        if self.load:
            loaded = np.load(self.load)
            assert loaded.shape == self.weights.shape, (
                'weights to load must match problem definition')
            self.weights.flat = loaded
        else:
            self.weights.flat = np.random.normal(
                self.problem.weight_mean, self.problem.weight_scale,
                len(self.weights.flat))

    def _init_training(self):
        # pylint: disable=redefined-variable-type
        """Classes needed during training."""
        if self.check:
            self.backprop = CheckedBackprop(self.network, self.problem.cost)
        else:
            self.backprop = BatchBackprop(self.network, self.problem.cost)
        self.momentum = Momentum()
        self.decent = GradientDecent()
        self.decay = WeightDecay()
        self.tying = WeightTying(*self.problem.weight_tying)
        self.weights = self.tying(self.weights)

    def _init_visualize(self):
        if not self.visual:
            return
        from layered.plot import Window, Plot
        self.plot_training = Plot(
            'Training', 'Examples', 'Cost', fixed=1000,
            style={'linestyle': '', 'marker': '.'})
        self.plot_testing = Plot('Testing', 'Time', 'Error')
        self.window = Window()
        self.window.register(211, self.plot_training)
        self.window.register(212, self.plot_testing)

    def __call__(self):
        """Train the model and visualize progress."""
        print('Start training')
        repeats = repeated(self.problem.dataset.training, self.problem.epochs)
        batches = batched(repeats, self.problem.batch_size)
        if self.visual:
            self.window.start(functools.partial(self._train_visual, batches))
        else:
            self._train(batches)

    def _train(self, batches):
        for index, batch in enumerate(batches):
            try:
                self._batch(index, batch)
            except KeyboardInterrupt:
                print('\nAborted')
                return
        print('Done')

    def _train_visual(self, batches, state):
        for index, batch in enumerate(batches):
            if not state.running:
                print('\nAborted')
                return
            self._batch(index, batch)
        print('Done')
        input('Press any key to close window')
        state.running = False

    def _batch(self, index, batch):
        if self.check:
            assert len(batch) == 1
            gradient = self.backprop(self.weights, batch[0])
        else:
            gradient = self.backprop(self.weights, batch)
        gradient = self.momentum(gradient, self.problem.momentum)
        gradient = self.tying(gradient)
        self.weights = self.decent(
            self.weights, gradient, self.problem.learning_rate)
        self.weights = self.decay(self.weights, self.problem.weight_decay)
        self._visualize(batch)
        self._evaluate(index)

    def _visualize(self, batch):
        if not self.visual:
            return
        costs = compute_costs(
            self.network, self.weights, self.problem.cost, batch)
        self.plot_training(costs)

    def _evaluate(self, index):
        if not self._every(self.problem.evaluate_every,
                           self.problem.batch_size, index):
            return
        if self.save:
            np.save(self.save, self.weights)
        error = compute_error(
            self.network, self.weights, self.problem.dataset.testing)
        print('Batch {} test error {:.2f}%'.format(index, 100 * error))
        if self.visual:
            self.plot_testing([error])

    @staticmethod
    def _every(times, step_size, index):
        """
        Given a loop over batches of an iterable and an operation that should
        be performed every few elements. Determine whether the operation should
        be called for the current index.
        """
        current = index * step_size
        step = current // times * times
        reached = current >= step
        overshot = current >= step + step_size
        return current and reached and not overshot
main.py
import os
import argparse
from layered.problem import Problem
from layered.trainer import Trainer


def main():
    parser = argparse.ArgumentParser('layered')
    parser.add_argument(
        'problem',
        help='path to the YAML problem definition')
    parser.add_argument(
        '-v', '--visual', action='store_true',
        help='show a diagram of training costs')
    parser.add_argument(
        '-l', '--load', default=None,
        help='path to load the weights from at startup')
    parser.add_argument(
        '-s', '--save', default=None,
        help='path to dump the learned weights at each evaluation')
    parser.add_argument(
        '-c', '--check', action='store_true',
        help='whether to activate gradient checking')
    args = parser.parse_args()

    print('Problem', os.path.split(args.problem)[1])
    problem = Problem(args.problem)
    trainer = Trainer(
        problem, args.load, args.save, args.visual, args.check)
    trainer()


if __name__ == '__main__':
    main()

这就好了

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值