










import sys
! pip install muffnn==1.2.0


! python setup.py build develop


# 导入包
from muffnn import MLPClassifier
X, y = load_some_data()
mlp = MLPClassifier()
mlp.fit(X, y)
X_new = load_some_unlabeled_data()
y_pred = mlp.predict(X_new)


import pickle
with open('est.pkl', 'wb') as fp:
    pickle.dump(est, fp)
  • pickle.dumps()将对象obj对象序列化并返回一个byte对象
  • pickle.loads(),从字节对象中读取被封装的对象



import numpy as np
class Example:
    Immutable class representing one example in a dataset.
    __slots__ = ('_data', '_target')
    def __init__(self, data, target):
        self._data = np.array(data, dtype=float)
        self._target = np.array(target, dtype=float)

    def data(self):
        return self._data

    def target(self):
        return self._target

    def __getstate__(self):
        return {'data': self.data, 'target': self.target}

    def __setstate__(self, state):
        self._data = state['data']
        self._target = state['target']

    def __repr__(self):
        data = ' '.join(str(round(x, 2)) for x in self.data)
        # round(a,b) 对a四舍五入,保留b位小数
        target = ' '.join(str(round(x, 2)) for x in self.target)
        return '({})->({})'.format(data, target)


import os
import errno
import functools
import itertools

def repeated(iterable, times):
    for _ in range(times):
    # 下划线表示 临时变量, 仅用一次,后面无需再用到
        yield from iterable
        # yield from iterable本质上等于for item in iterable: yield item的缩写版 
        #在 for 循环执行时,每次循环都会执行函数内部的代码,执行到 yield b 时,函数就返回一个迭代值,  

def batched(iterable, size):
    batch = []
    for element in iterable:
        if len(batch) == size:
            yield batch
            batch = []
    if batch:
        yield batch

def averaged(callable_, batch):
    overall = None
    for element in batch:
        current = callable_(element)
        overall = overall + current if overall else current
    return overall / len(batch)

def listify(fn=None, wrapper=list):
    From http://stackoverflow.com/a/12377059/1079110
    def listify_return(fn):
        #等价于 wrapper = functools.wraps(fn)(wrapper)
        def listify_helper(*args, **kw):
            return wrapper(fn(*args, **kw))
        return listify_helper

    if fn is None:
        return listify_return
    return listify_return(fn)

def ensure_folder(path):
    except OSError as e:
        if e.errno == errno.EEXIST:

def hstack_lines(blocks, sep=' '):
    blocks = [x.split('\n') for x in blocks]
    height = max(len(block) for block in blocks)
    widths = [max(len(line) for line in block) for block in blocks]
    output = ''
    for y in range(height):
        for x, w in enumerate(widths):
        # enumerate将其组成一个索引序列,利用它可以同时获得索引x和值w
            cell = blocks[x][y] if y < len(blocks[x]) else ''
            output += cell.rjust(w, ' ') + sep
            ##将字符串调整为宽带为w,并且右对齐的字符串,填充 ‘ ’
        output += '\n'
    return output

def pairwise(iterable):
    a, b = itertools.tee(iterable)
    # 使用itertools.tee可以让一个生成器被多次完整遍历,一般要设置这个默认值的
    next(b, None)
    return zip(a, b)




import array
import os
import shutil
import struct
import gzip
from urllib.request import urlopen
import numpy as np
from layered.example import Example
from layered.utility import ensure_folder

class Dataset:

    urls = []
    cache = True

    def __init__(self):
        cache = type(self).cache
        if cache and self._is_cached():
            print('Load cached dataset')
            filenames = [self.download(x) for x in type(self).urls]
            self.training, self.testing = self.parse(*filenames)
            if cache:

    def folder(cls):
        name = cls.__name__.lower()
        home = os.path.expanduser('~')
        folder = os.path.join(home, '.layered/dataset', name)
        return folder

    def parse(self):
        Subclass responsibility. The filenames of downloaded files will be
        passed as individual parameters to this function. Therefore, it must
        accept as many parameters as provided class-site urls. Should return a
        tuple of training examples and testing examples.
        raise NotImplementedError

    def dump(self):
        np.save(self._training_path(), self.training)
        np.save(self._testing_path(), self.testing)

    def load(self):
        self.training = np.load(self._training_path())
        self.testing = np.load(self._testing_path())

    def download(self, url):
        _, filename = os.path.split(url)
        filename = os.path.join(self.folder(), filename)
        print('Download', filename)
        with urlopen(url) as response, open(filename, 'wb') as file_:
            shutil.copyfileobj(response, file_)
        return filename

    def split(examples, ratio=0.8):
        Utility function that can be used within the parse() implementation of
        sub classes to split a list of example into two lists for training and
        split = int(ratio * len(examples))
        return examples[:split], examples[split:]

    def _is_cached(self):
        if not os.path.exists(self._training_path()):
            return False
        if not os.path.exists(self._testing_path()):
            return False
        return True

    def _training_path(self):
        return os.path.join(self.folder(), 'training.npy')

    def _testing_path(self):
        return os.path.join(self.folder(), 'testing.npy')

class Test(Dataset):

    cache = False

    def __init__(self, amount=10):
        self.amount = amount

    def parse(self):
        examples = [Example([1, 2, 3], [1, 2, 3]) for _ in range(self.amount)]
        return self.split(examples)

class Regression(Dataset):
    Synthetically generated dataset for regression. The task is to predict the
    sum and product of all the input values. All values are normalized between
    zero and one.

    cache = False

    def __init__(self, amount=10000, inputs=10):
        self.amount = amount
        self.inputs = inputs

    def parse(self):
        data = np.random.rand(self.amount, self.inputs)
        products = np.prod(data, axis=1)
        products = products / np.max(products)
        sums = np.sum(data, axis=1)
        sums = sums / np.max(sums)
        targets = np.column_stack([sums, products])
        examples = [Example(x, y) for x, y in zip(data, targets)]
        return self.split(examples)

class Modulo(Dataset):
    Sythetically generated classification dataset. The task is to predict the
    modulo classes of random integers encoded as bit arrays of length 32.

    cache = False

    def __init__(self, amount=60000, inputs=32, classes=7):
        self.amount = amount
        self.inputs = inputs
        self.classes = classes

    def parse(self):
        data = np.random.randint(0, self.inputs ** 2 - 1, self.amount)
        mods = np.mod(data, self.classes)
        targets = np.zeros((self.amount, self.classes))
        for index, mod in enumerate(mods):
            targets[index][mod] = 1
        data = (((data[:, None] & (1 << np.arange(self.inputs)))) > 0)
        examples = [Example(x, y) for x, y in zip(data, targets)]
        return self.split(examples)

class Mnist(Dataset):
    The MNIST database of handwritten digits, available from this page, has a
    training set of 60,000 examples, and a test set of 10,000 examples. It is a
    subset of a larger set available from NIST. The digits have been
    size-normalized and centered in a fixed-size image. It is a good database
    for people who want to try learning techniques and pattern recognition
    methods on real-world data while spending minimal efforts on preprocessing
    and formatting. (from http://yann.lecun.com/exdb/mnist/)

    urls = [

    def parse(self, train_x, train_y, test_x, test_y):
        # pylint: disable=arguments-differ
        training = list(self.read(train_x, train_y))
        testing = list(self.read(test_x, test_y))
        return training, testing

    def read(data, labels):
        images = gzip.open(data, 'rb')
        _, size, rows, cols = struct.unpack('>IIII', images.read(16))
        image_bin = array.array('B', images.read())

        labels = gzip.open(labels, 'rb')
        _, size2 = struct.unpack('>II', labels.read(8))
        assert size == size2
        label_bin = array.array('B', labels.read())

        for i in range(size):
            data = image_bin[i * rows * cols:(i + 1) * rows * cols]
            data = np.array(data).reshape(rows * cols) / 255
            target = np.zeros(10)
            target[label_bin[i]] = 1
            yield Example(data, target)



import numpy as np

class Cost:

    def __call__(self, prediction, target):
        raise NotImplementedError

    def delta(self, prediction, target):
        raise NotImplementedError

class SquaredError(Cost):
    Fast and simple cost function.

    def __call__(self, prediction, target):
        return (prediction - target) ** 2 / 2

    def delta(self, prediction, target):
        return prediction - target

class CrossEntropy(Cost):
    Logistic cost function used for classification tasks. Learns faster in the
    beginning than SquaredError because large errors are penalized
    exponentially. This makes sense in classification since only the best class
    will be the predicted one.

    def __init__(self, epsilon=1e-11):
        self.epsilon = epsilon

    def __call__(self, prediction, target):
        clipped = np.clip(prediction, self.epsilon, 1 - self.epsilon)
        cost = target * np.log(clipped) + (1 - target) * np.log(1 - clipped)
        return -cost

    def delta(self, prediction, target):
        denominator = np.maximum(prediction - prediction ** 2, self.epsilon)
        delta = (prediction - target) / denominator
        assert delta.shape == target.shape == prediction.shape
        return delta



import numpy as np

class Activation:

    def __call__(self, incoming):
        raise NotImplementedError

    def delta(self, incoming, outgoing, above):
        Compute the derivative of the cost with respect to the input of this
        activation function. Outgoing is what this function returned in the
        forward pass and above is the derivative of the cost with respect to
        the outgoing activation.
        raise NotImplementedError

class Identity(Activation):

    def __call__(self, incoming):
        return incoming

    def delta(self, incoming, outgoing, above):
        delta = np.ones(incoming.shape).astype(float)
        return delta * above

class Sigmoid(Activation):

    def __call__(self, incoming):
        return 1 / (1 + np.exp(-incoming))

    def delta(self, incoming, outgoing, above):
        delta = outgoing * (1 - outgoing)
        return delta * above

class Relu(Activation):

    def __call__(self, incoming):
        return np.maximum(incoming, 0)

    def delta(self, incoming, outgoing, above):
        delta = np.greater(incoming, 0).astype(float)
        return delta * above

class Softmax(Activation):

    def __call__(self, incoming):
        # The constant doesn't change the expression but prevents overflows.
        constant = np.max(incoming)
        exps = np.exp(incoming - constant)
        return exps / exps.sum()

    def delta(self, incoming, outgoing, above):
        delta = outgoing * above
        sum_ = delta.sum(axis=delta.ndim - 1, keepdims=True)
        delta -= outgoing * sum_
        return delta

class SparseField(Activation):

    def __init__(self, inhibition=0.05, leaking=0.0):
        self.inhibition = inhibition
        self.leaking = leaking

    def __call__(self, incoming):
        count = len(incoming)
        length = int(np.sqrt(count))
        assert length ** 2 == count, 'layer size must be a square'
        field = incoming.copy().reshape((length, length))
        radius = int(np.sqrt(self.inhibition * count)) // 2
        assert radius, 'no inhibition due to small factor'
        outgoing = np.zeros(field.shape)
        while True:
            x, y = np.unravel_index(field.argmax(), field.shape)
            if field[x, y] <= 0:
            outgoing[x, y] = 1
            surrounding = np.s_[
                max(x - radius, 0):min(x + radius + 1, length),
                max(y - radius, 0):min(y + radius + 1, length)]
            field[surrounding] = 0
            assert field[x, y] == 0
        outgoing = outgoing.reshape(count)
        outgoing = np.maximum(outgoing, self.leaking * incoming)
        return outgoing

    def delta(self, incoming, outgoing, above):
        delta = np.greater(outgoing, 0).astype(float)
        return delta * above

class SparseRange(Activation):
    E%-Max Winner-Take-All.

    Binary activation. First, the activation function is applied. Then all
    neurons within the specified range below the strongest neuron are set to
    one. All others are set to zero. The gradient is the one of the activation
    function for active neurons and zero otherwise.

    See: A Second Function of Gamma Frequency Oscillations: An E%-Max
    Winner-Take-All Mechanism Selects Which Cells Fire. (2009)

    def __init__(self, range_=0.3, function=Sigmoid()):
        assert 0 < range_ < 1
        self._range = range_
        self._function = function

    def __call__(self, incoming):
        incoming = self._function(incoming)
        threshold = self._threshold(incoming)
        active = (incoming >= threshold)
        outgoing = np.zeros(incoming.shape)
        outgoing[active] = 1
        # width = active.sum() * 80 / 1000
        # print('|', '#' * width, ' ' * (80 - width), '|')
        return outgoing

    def delta(self, incoming, outgoing, above):
        # return self._function.delta(incoming, outgoing, outgoing * above)
        return outgoing * self._function.delta(incoming, outgoing, above)

    def _threshold(self, incoming):
        min_, max_ = incoming.min(), incoming.max()
        threshold = min_ + (max_ - min_) * (1 - self._range)
        return threshold



import operator
import numpy as np

class Layer:

    def __init__(self, size, activation):
        assert size and isinstance(size, int)
        self.size = size
        self.activation = activation()
        self.incoming = np.zeros(size)
        self.outgoing = np.zeros(size)
        assert len(self.incoming) == len(self.outgoing) == self.size

    def __len__(self):
        assert len(self.incoming) == len(self.outgoing)
        return len(self.incoming)

    def __repr__(self):
        return repr(self.outgoing)

    def __str__(self):
        table = zip(self.incoming, self.outgoing)
        rows = [' /'.join('{: >6.3f}'.format(x) for x in row) for row in table]
        return '\n'.join(rows)

    def apply(self, incoming):
        Store the incoming activation, apply the activation function and store
        the result as outgoing activation.
        assert len(incoming) == self.size
        self.incoming = incoming
        outgoing = self.activation(self.incoming)
        assert len(outgoing) == self.size
        self.outgoing = outgoing

    def delta(self, above):
        The derivative of the activation function at the current state.
        return self.activation.delta(self.incoming, self.outgoing, above)

class Matrices:

    def __init__(self, shapes, elements=None):
        self.shapes = shapes
        length = sum(x * y for x, y in shapes)
        if elements is not None:
            assert len(elements) == length
            elements = elements.copy()
            elements = np.zeros(length)
        self.flat = elements

    def __len__(self):
        return len(self.shapes)

    def __getitem__(self, index):
        if hasattr(index, '__len__'):
            assert isinstance(index[0], int)
            return self[index[0]][index[1:]]
        if isinstance(index, slice):
            return [self[i] for i in self._range_from_slice(index)]
        slice_ = self._locate(index)
        data = self.flat[slice_]
        data = data.reshape(self.shapes[index])
        return data

    def __setitem__(self, index, data):
        if hasattr(index, '__len__'):
            assert isinstance(index[0], int)
            self[index[0]][index[1:]] = data
        if isinstance(index, slice):
            for i in self._range_from_slice(index):
                self[i] = data
        slice_ = self._locate(index)
        data = data.reshape(slice_.stop - slice_.start)
        self.flat[slice_] = data

    def __getattr__(self, name):
        # Tunnel not found properties to the underlying array.
        flat = super().__getattribute__('flat')
        return getattr(flat, name)

    def __setattr_(self, name, value):
        # Ensure that the size of the underlying array doesn't change.
        if name == 'flat':
            assert value.shape == self.flat.shape
        super().__setattr__(name, value)

    def copy(self):
        return Matrices(self.shapes, self.flat.copy())

    def __add__(self, other):
        return self._operation(other, lambda x, y: x + y)

    def __sub__(self, other):
        return self._operation(other, lambda x, y: x - y)

    def __mul__(self, other):
        return self._operation(other, lambda x, y: x * y)

    def __truediv__(self, other):
        return self._operation(other, lambda x, y: x / y)

    __rmul__ = __mul__

    __radd__ = __add__

    def _operation(self, other, operation):
            other = other.flat
        except AttributeError:
        return Matrices(self.shapes, operation(self.flat, other))

    def _locate(self, index):
        assert isinstance(index, int), (
            'Only single elemente can be indiced in the first dimension.')
        if index < 0:
            index = len(self.shapes) + index
        if not 0 <= index < len(self.shapes):
            raise IndexError
        offset = sum(x * y for x, y in self.shapes[:index])
        length = operator.mul(*self.shapes[index])
        return slice(offset, offset + length)

    def _range_from_slice(self, slice_):
        start = slice_.start if slice_.start else 0
        stop = slice_.stop if slice_.stop else len(self.shapes)
        step = slice_.step if slice_.step else 1
        return range(start, stop, step)

    def __str__(self):
        return str(len(self.flat)) + str(self.flat)

class Network:

    def __init__(self, layers):
        self.layers = layers
        self.sizes = tuple(layer.size for layer in self.layers)
        # Weight matrices have the dimensions of the two layers around them.
        # Also, there is an additional bias input to each weight matrix.
        self.shapes = zip(self.sizes[:-1], self.sizes[1:])
        self.shapes = [(x + 1, y) for x, y in self.shapes]
        # Weight matrices are in between the layers.
        assert len(self.shapes) == len(self.layers) - 1

    def feed(self, weights, data):
        Evaluate the network with alternative weights on the input data and
        return the output activation.
        assert len(data) == self.layers[0].size
        # Propagate trough the remaining layers.
        connections = zip(self.layers[:-1], weights, self.layers[1:])
        for previous, weight, current in connections:
            incoming = self.forward(weight, previous.outgoing)
        # Return the activations of the output layer.
        return self.layers[-1].outgoing

    def forward(weight, activations):
        # Add bias input of one.
        activations = np.insert(activations, 0, 1)
        assert activations[0] == 1
        right = activations.dot(weight)
        return right

    def backward(weight, activations):
        left = activations.dot(weight.transpose())
        # Don't expose the bias input of one.
        left = left[1:]
        return left
import os
import yaml
import layered.cost
import layered.dataset
import layered.activation
from layered.network import Layer

class Problem:

    def __init__(self, content=None):
        Construct a problem. If content is specified, try to load it as a YAML
        path and otherwise treat it as an inline YAML string.
        if content and os.path.isfile(content):
            with open(content) as file_:
        elif content:

    def __str__(self):
        keys = self.__dict__.keys() & self._defaults().keys()
        return str({x: getattr(self, x) for x in keys})

    def parse(self, definition):
        definition = yaml.load(definition)
        assert not definition, (
            'unknown properties {} in problem definition'
            .format(', '.join(definition.keys())))

    def _load_definition(self, definition):
        # The empty dictionary causes defaults to be loaded even if the
        # definition is None.
        if not definition:
            definition = {}
        for name, default in self._defaults().items():
            type_ = type(default)
            self.__dict__[name] = type_(definition.pop(name, default))

    def _load_symbols(self):
        # pylint: disable=attribute-defined-outside-init
        self.cost = self._find_symbol(layered.cost, self.cost)()
        self.dataset = self._find_symbol(layered.dataset, self.dataset)()

    def _load_layers(self):
        for index, layer in enumerate(self.layers):
            size, activation = layer.pop('size'), layer.pop('activation')
            activation = self._find_symbol(layered.activation, activation)
            self.layers[index] = Layer(size, activation)

    def _load_weight_tying(self):
        # pylint: disable=attribute-defined-outside-init
        self.weight_tying = [[y.split(',') for y in x]
                             for x in self.weight_tying]
        for i, group in enumerate(self.weight_tying):
            for j, slices in enumerate(group):
                for k, slice_ in enumerate(slices):
                    slice_ = [int(s) if s else None for s in slice_.split(':')]
                    self.weight_tying[i][j][k] = slice(*slice_)
        for i, group in enumerate(self.weight_tying):
            for j, slices in enumerate(group):
                assert not slices[0].start and not slices[0].step, (
                    'Ranges are not allowed in the first dimension.')
                self.weight_tying[i][j][0] = slices[0].stop

    def _find_symbol(self, module, name, fallback=None):
        Find the symbol of the specified name inside the module or raise an
        if not hasattr(module, name) and fallback:
            return self._find_symbol(module, fallback, None)
        return getattr(module, name)

    def _validate(self):
        num_input = len(self.dataset.training[0].data)
        num_output = len(self.dataset.training[0].target)
        if self.layers:
            assert self.layers[0].size == num_input, (
                'the size of the input layer must match the training data')
            assert self.layers[-1].size == num_output, (
                'the size of the output layer must match the training labels')

    def _defaults():
        return {
            'cost': 'SquaredError',
            'dataset': 'Modulo',
            'layers': [],
            'epochs': 1,
            'batch_size': 1,
            'learning_rate': 0.1,
            'momentum': 0.0,
            'weight_scale': 0.1,
            'weight_mean': 0.0,
            'weight_decay': 0.0,
            'weight_tying': [],
            'evaluate_every': 1000,



import numpy as np

def compute_costs(network, weights, cost, examples):
    prediction = [network.feed(weights, x.data) for x in examples]
    costs = [cost(x, y.target).mean() for x, y in zip(prediction, examples)]
    return costs

def compute_error(network, weights, examples):
    prediction = [network.feed(weights, x.data) for x in examples]
    error = sum(bool(np.argmax(x) != np.argmax(y.target)) for x, y in
                zip(prediction, examples)) / len(examples)
    return error


import math
import functools
import multiprocessing
import numpy as np
from layered.network import Matrices
from layered.utility import batched

class Gradient:

    def __init__(self, network, cost):
        self.network = network
        self.cost = cost

    def __call__(self, weights, example):
        raise NotImplementedError

class Backprop(Gradient):
    Use the backpropagation algorithm to efficiently determine the gradient of
    the cost function with respect to each individual weight.

    def __call__(self, weights, example):
        prediction = self.network.feed(weights, example.data)
        delta_output = self._delta_output(prediction, example.target)
        delta_layers = self._delta_layers(weights, delta_output)
        delta_weights = self._delta_weights(delta_layers)
        return delta_weights

    def _delta_output(self, prediction, target):
        assert len(target) == self.network.layers[-1].size
        # The derivative with respect to the output layer is computed as the
        # product of error derivative and local derivative at the layer.
        delta_cost = self.cost.delta(prediction, target)
        delta_output = self.network.layers[-1].delta(delta_cost)
        assert len(delta_cost) == len(delta_output) == len(target)
        return delta_output

    def _delta_layers(self, weights, delta_output):
        # Propagate backwards through the hidden layers but not the input
        # layer. The current weight matrix is the one to the right of the
        # current layer.
        gradient = [delta_output]
        hidden = list(zip(weights[1:], self.network.layers[1:-1]))
        assert all(x.shape[0] - 1 == len(y) for x, y in hidden)
        for weight, layer in reversed(hidden):
            delta = self._delta_layer(layer, weight, gradient[-1])
        return reversed(gradient)

    def _delta_layer(self, layer, weight, above):
        # The gradient at a layer is computed as the derivative of both the
        # local activation and the weighted sum of the derivatives in the
        # deeper layer.
        backward = self.network.backward(weight, above)
        delta = layer.delta(backward)
        assert len(layer) == len(backward) == len(delta)
        return delta

    def _delta_weights(self, delta_layers):
        # The gradient with respect to the weights is computed as the gradient
        # at the target neuron multiplied by the activation of the source
        # neuron.
        gradient = Matrices(self.network.shapes)
        prev_and_delta = zip(self.network.layers[:-1], delta_layers)
        for index, (previous, delta) in enumerate(prev_and_delta):
            # We want to tweak the bias weights so we need them in the
            # gradient.
            activations = np.insert(previous.outgoing, 0, 1)
            assert activations[0] == 1
            gradient[index] = np.outer(activations, delta)
        return gradient

class NumericalGradient(Gradient):
    Approximate the gradient for each weight individually by sampling the error
    function slightly above and below the current value of the weight.

    def __init__(self, network, cost, distance=1e-5):
        super().__init__(network, cost)
        self.distance = distance

    def __call__(self, weights, example):
        Modify each weight individually in both directions to calculate a
        numeric gradient of the weights.
        # We need a copy of the weights that we can modify to evaluate the cost
        # function on.
        modified = Matrices(weights.shapes, weights.flat.copy())
        gradient = Matrices(weights.shapes)
        for i, connection in enumerate(weights):
            for j, original in np.ndenumerate(connection):
                # Sample above and below and compute costs.
                modified[i][j] = original + self.distance
                above = self._evaluate(modified, example)
                modified[i][j] = original - self.distance
                below = self._evaluate(modified, example)
                # Restore the original value so we can reuse the weight matrix
                # for the next iteration.
                modified[i][j] = original
                # Compute the numeric gradient.
                sample = (above - below) / (2 * self.distance)
                gradient[i][j] = sample
        return gradient

    def _evaluate(self, weights, example):
        prediction = self.network.feed(weights, example.data)
        cost = self.cost(prediction, example.target)
        assert cost.shape == prediction.shape
        return cost.sum()

class CheckedBackprop(Gradient):
    Computes the gradient both analytically trough backpropagation and
    numerically to validate the backpropagation implementation and derivatives
    of activation functions and cost functions. This is slow by its nature and
    it's recommended to validate derivatives on small networks.

    def __init__(self, network, cost, distance=1e-5, tolerance=1e-8):
        self.tolerance = tolerance
        super().__init__(network, cost)
        self.analytic = Backprop(network, cost)
        self.numeric = NumericalGradient(network, cost, distance)

    def __call__(self, weights, example):
        analytic = self.analytic(weights, example)
        numeric = self.numeric(weights, example)
        distances = np.absolute(analytic.flat - numeric.flat)
        worst = distances.max()
        if worst > self.tolerance:
            print('Gradient differs by {:.2f}%'.format(100 * worst))
            print('Gradient looks good')
        return analytic

class BatchBackprop:
    Calculate the average gradient over a batch of examples.

    def __init__(self, network, cost):
        self.backprop = Backprop(network, cost)

    def __call__(self, weights, examples):
        gradient = Matrices(weights.shapes)
        for example in examples:
            gradient += self.backprop(weights, example)
        return gradient / len(examples)

class ParallelBackprop:
    Alternative to BatchBackprop that yields the same results but utilizes
    multiprocessing to make use of more than one processor core.

    def __init__(self, network, cost, workers=4):
        self.backprop = BatchBackprop(network, cost)
        self.workers = workers
        self.pool = multiprocessing.Pool(self.workers)

    def __call__(self, weights, examples):
        batch_size = int(math.ceil(len(examples) / self.workers))
        batches = list(batched(examples, batch_size))
        sizes = [len(x) / batch_size for x in batches]
        sizes = [x / sum(sizes) for x in sizes]
        assert len(batches) <= self.workers
        assert sum(sizes) == 1
        compute = functools.partial(self.backprop, weights)
        gradients = self.pool.map(compute, batches)
        return sum(x * y for x, y in zip(gradients, sizes))


# pylint: disable=wrong-import-position
import collections
import time
import warnings
import inspect
import threading
import matplotlib

# Don't call the code if Sphinx inspects the file mocking external imports.
if inspect.ismodule(matplotlib):  # noqa
    # On Mac force backend that works with threading.
    if matplotlib.get_backend() == 'MacOSX':
    # Hide matplotlib deprecation message.
    warnings.filterwarnings('ignore', category=matplotlib.cbook.mplDeprecation)
    # Ensure available interactive backend.
    if matplotlib.get_backend() not in matplotlib.rcsetup.interactive_bk:
        print('No visual backend available. Maybe you are inside a virtualenv '
              'that was created without --system-site-packages.')

import matplotlib.pyplot as plt

class Interface:

    def __init__(self, title='', xlabel='', ylabel='', style=None):
        self._style = style or {}
        self._title = title
        self._xlabel = xlabel
        self._ylabel = ylabel
        self.xdata = []
        self.ydata = []
        self.width = 0
        self.height = 0

    def style(self):
        return self._style

    def title(self):
        return self._title

    def xlabel(self):
        return self._xlabel

    def ylabel(self):
        return self._ylabel

class State:

    def __init__(self):
        self.running = False

class Window:

    def __init__(self, refresh=0.5):
        self.refresh = refresh
        self.thread = None
        self.state = State()
        self.figure = plt.figure()
        self.interfaces = []

    def register(self, position, interface):
        axis = self.figure.add_subplot(
            position, title=interface.title,
            xlabel=interface.xlabel, ylabel=interface.ylabel)
        line, = axis.plot(interface.xdata, interface.ydata, **interface.style)
        self.interfaces.append((axis, line, interface))

    def start(self, work):
        Hand the main thread to the window and continue work in the provided
        function. A state is passed as the first argument that contains a
        `running` flag. The function is expected to exit if the flag becomes
        false. The flag can also be set to false to stop the window event loop
        and continue in the main thread after the `start()` call.
        assert threading.current_thread() == threading.main_thread()
        assert not self.state.running
        self.state.running = True
        self.thread = threading.Thread(target=work, args=(self.state,))
        while self.state.running:
                before = time.time()
                duration = time.time() - before
                plt.pause(max(0.001, self.refresh - duration))
            except KeyboardInterrupt:
                self.state.running = False

    def stop(self):
        Close the window and stops the worker thread. The main thread will
        resume with the next command after the `start()` call.
        assert threading.current_thread() == self.thread
        assert self.state.running
        self.state.running = False

    def update(self):
        Redraw the figure to show changed data. This is automatically called
        after `start()` was run.
        assert threading.current_thread() == threading.main_thread()
        for axis, line, interface in self.interfaces:
            axis.set_xlim(0, interface.width or 1, emit=False)
            axis.set_ylim(0, interface.height or 1, emit=False)

class Plot(Interface):

    def __init__(self, title, xlabel, ylabel, style=None, fixed=None):
        # pylint: disable=too-many-arguments, redefined-variable-type
        super().__init__(title, xlabel, ylabel, style or {})
        self.max_ = 0
        if not fixed:
            self.xdata = []
            self.ydata = []
            self.xdata = list(range(fixed))
            self.ydata = collections.deque([None] * fixed, maxlen=fixed)
            self.width = fixed

    def __call__(self, values):
        self.ydata += values
        self.max_ = max(self.max_, *values)
        self.height = 1.05 * self.max_
        while len(self.xdata) < len(self.ydata):
        self.width = len(self.xdata) - 1
        assert len(self.xdata) == len(self.ydata)


class GradientDecent:
    Adapt the weights in the opposite direction of the gradient to reduce the

    def __call__(self, weights, gradient, learning_rate=0.1):
        return weights - learning_rate * gradient

class Momentum:
    Slow down changes of direction in the gradient by aggregating previous
    values of the gradient and multiplying them in.

    def __init__(self):
        self.previous = None

    def __call__(self, gradient, rate=0.9):
        gradient = gradient.copy()
        if self.previous is None:
            self.previous = gradient.copy()
            assert self.previous.shape == gradient.shape
            gradient += rate * self.previous
            self.previous = gradient.copy()
        return gradient

class WeightDecay:
    Slowly moves each weight closer to zero for regularization. This can help
    the model to find simpler solutions.

    def __call__(self, weights, rate=1e-4):
        return (1 - rate) * weights

class WeightTying:
    Constraint groups of slices of the gradient to have the same value by
    averaging them. Should be applied to the initial weights and each gradient.

    def __init__(self, *groups):
        for group in groups:
            assert group and hasattr(group, '__len__')
            assert all([isinstance(x[0], int) for x in group])
            assert all([isinstance(y, (slice, int)) for x in group for y in x])
        self.groups = groups

    def __call__(self, matrices):
        matrices = matrices.copy()
        for group in self.groups:
            slices = [matrices[slice_] for slice_ in group]
            assert all([x.shape == slices[0].shape for x in slices]), (
                'All slices within a group must have the same shape. '
                'Shapes are ' + ', '.join(str(x.shape) for x in slices) + '.')
            average = sum(slices) / len(slices)
            assert average.shape == slices[0].shape
            for slice_ in group:
                matrices[slice_] = average
        return matrices



import functools
import numpy as np
from layered.gradient import BatchBackprop, CheckedBackprop
from layered.network import Network, Matrices
from layered.optimization import (
    GradientDecent, Momentum, WeightDecay, WeightTying)
from layered.utility import repeated, batched
from layered.evaluation import compute_costs, compute_error

class Trainer:
    # pylint: disable=attribute-defined-outside-init, too-many-arguments

    def __init__(self, problem, load=None, save=None,
                 visual=False, check=False):
        self.problem = problem
        self.load = load
        self.save = save
        self.visual = visual
        self.check = check

    def _init_network(self):
        """Define model and initialize weights."""
        self.network = Network(self.problem.layers)
        self.weights = Matrices(self.network.shapes)
        if self.load:
            loaded = np.load(self.load)
            assert loaded.shape == self.weights.shape, (
                'weights to load must match problem definition')
            self.weights.flat = loaded
            self.weights.flat = np.random.normal(
                self.problem.weight_mean, self.problem.weight_scale,

    def _init_training(self):
        # pylint: disable=redefined-variable-type
        """Classes needed during training."""
        if self.check:
            self.backprop = CheckedBackprop(self.network, self.problem.cost)
            self.backprop = BatchBackprop(self.network, self.problem.cost)
        self.momentum = Momentum()
        self.decent = GradientDecent()
        self.decay = WeightDecay()
        self.tying = WeightTying(*self.problem.weight_tying)
        self.weights = self.tying(self.weights)

    def _init_visualize(self):
        if not self.visual:
        from layered.plot import Window, Plot
        self.plot_training = Plot(
            'Training', 'Examples', 'Cost', fixed=1000,
            style={'linestyle': '', 'marker': '.'})
        self.plot_testing = Plot('Testing', 'Time', 'Error')
        self.window = Window()
        self.window.register(211, self.plot_training)
        self.window.register(212, self.plot_testing)

    def __call__(self):
        """Train the model and visualize progress."""
        print('Start training')
        repeats = repeated(self.problem.dataset.training, self.problem.epochs)
        batches = batched(repeats, self.problem.batch_size)
        if self.visual:
            self.window.start(functools.partial(self._train_visual, batches))

    def _train(self, batches):
        for index, batch in enumerate(batches):
                self._batch(index, batch)
            except KeyboardInterrupt:

    def _train_visual(self, batches, state):
        for index, batch in enumerate(batches):
            if not state.running:
            self._batch(index, batch)
        input('Press any key to close window')
        state.running = False

    def _batch(self, index, batch):
        if self.check:
            assert len(batch) == 1
            gradient = self.backprop(self.weights, batch[0])
            gradient = self.backprop(self.weights, batch)
        gradient = self.momentum(gradient, self.problem.momentum)
        gradient = self.tying(gradient)
        self.weights = self.decent(
            self.weights, gradient, self.problem.learning_rate)
        self.weights = self.decay(self.weights, self.problem.weight_decay)

    def _visualize(self, batch):
        if not self.visual:
        costs = compute_costs(
            self.network, self.weights, self.problem.cost, batch)

    def _evaluate(self, index):
        if not self._every(self.problem.evaluate_every,
                           self.problem.batch_size, index):
        if self.save:
            np.save(self.save, self.weights)
        error = compute_error(
            self.network, self.weights, self.problem.dataset.testing)
        print('Batch {} test error {:.2f}%'.format(index, 100 * error))
        if self.visual:

    def _every(times, step_size, index):
        Given a loop over batches of an iterable and an operation that should
        be performed every few elements. Determine whether the operation should
        be called for the current index.
        current = index * step_size
        step = current // times * times
        reached = current >= step
        overshot = current >= step + step_size
        return current and reached and not overshot
import os
import argparse
from layered.problem import Problem
from layered.trainer import Trainer

def main():
    parser = argparse.ArgumentParser('layered')
        help='path to the YAML problem definition')
        '-v', '--visual', action='store_true',
        help='show a diagram of training costs')
        '-l', '--load', default=None,
        help='path to load the weights from at startup')
        '-s', '--save', default=None,
        help='path to dump the learned weights at each evaluation')
        '-c', '--check', action='store_true',
        help='whether to activate gradient checking')
    args = parser.parse_args()

    print('Problem', os.path.split(args.problem)[1])
    problem = Problem(args.problem)
    trainer = Trainer(
        problem, args.load, args.save, args.visual, args.check)

if __name__ == '__main__':


  • 0
  • 2
    觉得还不错? 一键收藏
  • 0




当前余额3.43前往充值 >
领取后你会自动成为博主和红包主的粉丝 规则
钱包余额 0


