Feed forward neural networks and perceptrons
目录
一、知识基础
1.概念
前馈神经网络和感知机,信息从前(输入)往后(输出)流动,一般用反向传播(BP)来训练。算是一种监督学习。
2.图解
二、模型demo
1.库的安装
查看notebook的python所在位置
import sys
print(sys.version)
print(sys.executable)
! pip install muffnn==1.2.0
然后我们运行github上的setup.py文件来配置环境:
! python setup.py build develop
配置成功!(后来发现已经在一个文件夹里了hiahia—)
现在我们看看它的帮助使用的文档——
# 导入包
from muffnn import MLPClassifier
#导入数据
X, y = load_some_data()
#模型框架构造
mlp = MLPClassifier()
#模型拟合,确定参数
mlp.fit(X, y)
#导入新数据,进行分类
X_new = load_some_unlabeled_data()
y_pred = mlp.predict(X_new)
因为TensorFlow在训练的时候会使用.pkl文件,所以如果要在这个模型上使用文件的话
import pickle
with open('est.pkl', 'wb') as fp:
pickle.dump(est, fp)
- pickle.dumps()将对象obj对象序列化并返回一个byte对象
- pickle.loads(),从字节对象中读取被封装的对象
2.学习封装的Layer函数
我也不知道前面在做些什么,也许正题才刚刚开始
example.py
import numpy as np
class Example:
"""
Immutable class representing one example in a dataset.
定义一个不变的类来代表数据集里的一个example
"""
'''
__slot__只允许对实例添加列出来的两个属性
'''
__slots__ = ('_data', '_target')
'''
定义__init__后,执行实例化的过程须传入定义的参数
'''
def __init__(self, data, target):
self._data = np.array(data, dtype=float)
self._target = np.array(target, dtype=float)
@property
def data(self):
return self._data
@property
def target(self):
return self._target
def __getstate__(self):
return {'data': self.data, 'target': self.target}
def __setstate__(self, state):
self._data = state['data']
self._target = state['target']
def __repr__(self):
data = ' '.join(str(round(x, 2)) for x in self.data)
# round(a,b) 对a四舍五入,保留b位小数
target = ' '.join(str(round(x, 2)) for x in self.target)
return '({})->({})'.format(data, target)
草草的就学完了第一个py文件,等运用的时候我们再回来康康!
utility.py
import os
import errno
import functools
import itertools
def repeated(iterable, times):
for _ in range(times):
# 下划线表示 临时变量, 仅用一次,后面无需再用到
yield from iterable
# yield from iterable本质上等于for item in iterable: yield item的缩写版
#在 for 循环执行时,每次循环都会执行函数内部的代码,执行到 yield b 时,函数就返回一个迭代值,
def batched(iterable, size):
'''
这个函数用来提取batch(批量),迭代样本的时候是需要抽取的,一个固定size的样本batch
'''
batch = []
for element in iterable:
batch.append(element)
if len(batch) == size:
yield batch
batch = []
if batch:
yield batch
def averaged(callable_, batch):
'''
这里对batch的每个元素进行callable_处理,最后返回callable_结果为1的样本占比
'''
overall = None
for element in batch:
current = callable_(element)
overall = overall + current if overall else current
return overall / len(batch)
def listify(fn=None, wrapper=list):
"""
From http://stackoverflow.com/a/12377059/1079110
这是个该shi的啥玩意我也不太懂,简单理解就是一个装饰器,具体后面用到这个函数再说吧
"""
def listify_return(fn):
@functools.wraps(fn)
#等价于 wrapper = functools.wraps(fn)(wrapper)
def listify_helper(*args, **kw):
return wrapper(fn(*args, **kw))
return listify_helper
if fn is None:
return listify_return
return listify_return(fn)
def ensure_folder(path):
'''
检查能不能创建路径
'''
try:
os.makedirs(path)
except OSError as e:
if e.errno == errno.EEXIST:
return
raise
def hstack_lines(blocks, sep=' '):
'''
这看起来像一个切词函数
最终还是输出一个右对齐的字符串
也许是在变换格式?
呵......
'''
blocks = [x.split('\n') for x in blocks]
height = max(len(block) for block in blocks)
widths = [max(len(line) for line in block) for block in blocks]
output = ''
for y in range(height):
for x, w in enumerate(widths):
# enumerate将其组成一个索引序列,利用它可以同时获得索引x和值w
cell = blocks[x][y] if y < len(blocks[x]) else ''
output += cell.rjust(w, ' ') + sep
##将字符串调整为宽带为w,并且右对齐的字符串,填充 ‘ ’
output += '\n'
return output
def pairwise(iterable):
'''
生成一对一对的数据
'''
a, b = itertools.tee(iterable)
# 使用itertools.tee可以让一个生成器被多次完整遍历,一般要设置这个默认值的
next(b, None)
return zip(a, b)
#把每一对a,b都打包在一起
呕,快吐了,这就是底层代码带来的杀伤力
K.O…
dataset.py
这是一个导入数据集并且将数据集规范化的函数
import array
import os
import shutil
import struct
import gzip
from urllib.request import urlopen
import numpy as np
from layered.example import Example
from layered.utility import ensure_folder
class Dataset:
urls = []
cache = True
def __init__(self):
cache = type(self).cache
if cache and self._is_cached():
print('Load cached dataset')
self.load()
else:
filenames = [self.download(x) for x in type(self).urls]
self.training, self.testing = self.parse(*filenames)
if cache:
self.dump()
@classmethod
def folder(cls):
name = cls.__name__.lower()
home = os.path.expanduser('~')
folder = os.path.join(home, '.layered/dataset', name)
ensure_folder(folder)
return folder
def parse(self):
"""
Subclass responsibility. The filenames of downloaded files will be
passed as individual parameters to this function. Therefore, it must
accept as many parameters as provided class-site urls. Should return a
tuple of training examples and testing examples.
"""
raise NotImplementedError
def dump(self):
np.save(self._training_path(), self.training)
np.save(self._testing_path(), self.testing)
def load(self):
self.training = np.load(self._training_path())
self.testing = np.load(self._testing_path())
def download(self, url):
_, filename = os.path.split(url)
filename = os.path.join(self.folder(), filename)
print('Download', filename)
with urlopen(url) as response, open(filename, 'wb') as file_:
shutil.copyfileobj(response, file_)
return filename
@staticmethod
def split(examples, ratio=0.8):
"""
Utility function that can be used within the parse() implementation of
sub classes to split a list of example into two lists for training and
testing.
"""
split = int(ratio * len(examples))
return examples[:split], examples[split:]
def _is_cached(self):
if not os.path.exists(self._training_path()):
return False
if not os.path.exists(self._testing_path()):
return False
return True
def _training_path(self):
return os.path.join(self.folder(), 'training.npy')
def _testing_path(self):
return os.path.join(self.folder(), 'testing.npy')
class Test(Dataset):
cache = False
def __init__(self, amount=10):
self.amount = amount
super().__init__()
def parse(self):
examples = [Example([1, 2, 3], [1, 2, 3]) for _ in range(self.amount)]
return self.split(examples)
class Regression(Dataset):
"""
Synthetically generated dataset for regression. The task is to predict the
sum and product of all the input values. All values are normalized between
zero and one.
"""
cache = False
def __init__(self, amount=10000, inputs=10):
self.amount = amount
self.inputs = inputs
super().__init__()
def parse(self):
data = np.random.rand(self.amount, self.inputs)
products = np.prod(data, axis=1)
products = products / np.max(products)
sums = np.sum(data, axis=1)
sums = sums / np.max(sums)
targets = np.column_stack([sums, products])
examples = [Example(x, y) for x, y in zip(data, targets)]
return self.split(examples)
class Modulo(Dataset):
"""
Sythetically generated classification dataset. The task is to predict the
modulo classes of random integers encoded as bit arrays of length 32.
"""
cache = False
def __init__(self, amount=60000, inputs=32, classes=7):
self.amount = amount
self.inputs = inputs
self.classes = classes
super().__init__()
def parse(self):
data = np.random.randint(0, self.inputs ** 2 - 1, self.amount)
mods = np.mod(data, self.classes)
targets = np.zeros((self.amount, self.classes))
for index, mod in enumerate(mods):
targets[index][mod] = 1
data = (((data[:, None] & (1 << np.arange(self.inputs)))) > 0)
examples = [Example(x, y) for x, y in zip(data, targets)]
return self.split(examples)
class Mnist(Dataset):
"""
The MNIST database of handwritten digits, available from this page, has a
training set of 60,000 examples, and a test set of 10,000 examples. It is a
subset of a larger set available from NIST. The digits have been
size-normalized and centered in a fixed-size image. It is a good database
for people who want to try learning techniques and pattern recognition
methods on real-world data while spending minimal efforts on preprocessing
and formatting. (from http://yann.lecun.com/exdb/mnist/)
"""
urls = [
'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz',
'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz',
'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz',
'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz',
]
def parse(self, train_x, train_y, test_x, test_y):
# pylint: disable=arguments-differ
training = list(self.read(train_x, train_y))
testing = list(self.read(test_x, test_y))
return training, testing
@staticmethod
def read(data, labels):
images = gzip.open(data, 'rb')
_, size, rows, cols = struct.unpack('>IIII', images.read(16))
image_bin = array.array('B', images.read())
images.close()
labels = gzip.open(labels, 'rb')
_, size2 = struct.unpack('>II', labels.read(8))
assert size == size2
label_bin = array.array('B', labels.read())
labels.close()
for i in range(size):
data = image_bin[i * rows * cols:(i + 1) * rows * cols]
data = np.array(data).reshape(rows * cols) / 255
target = np.zeros(10)
target[label_bin[i]] = 1
yield Example(data, target)
cost.py
计算代价函数
import numpy as np
class Cost:
def __call__(self, prediction, target):
raise NotImplementedError
def delta(self, prediction, target):
raise NotImplementedError
class SquaredError(Cost):
"""
Fast and simple cost function.
"""
def __call__(self, prediction, target):
return (prediction - target) ** 2 / 2
def delta(self, prediction, target):
return prediction - target
class CrossEntropy(Cost):
"""
Logistic cost function used for classification tasks. Learns faster in the
beginning than SquaredError because large errors are penalized
exponentially. This makes sense in classification since only the best class
will be the predicted one.
计算交叉熵
"""
def __init__(self, epsilon=1e-11):
self.epsilon = epsilon
def __call__(self, prediction, target):
clipped = np.clip(prediction, self.epsilon, 1 - self.epsilon)
cost = target * np.log(clipped) + (1 - target) * np.log(1 - clipped)
return -cost
def delta(self, prediction, target):
denominator = np.maximum(prediction - prediction ** 2, self.epsilon)
delta = (prediction - target) / denominator
assert delta.shape == target.shape == prediction.shape
return delta
activation.py
定义各种激活函数
import numpy as np
class Activation:
def __call__(self, incoming):
raise NotImplementedError
def delta(self, incoming, outgoing, above):
"""
Compute the derivative of the cost with respect to the input of this
activation function. Outgoing is what this function returned in the
forward pass and above is the derivative of the cost with respect to
the outgoing activation.
"""
raise NotImplementedError
class Identity(Activation):
def __call__(self, incoming):
return incoming
def delta(self, incoming, outgoing, above):
delta = np.ones(incoming.shape).astype(float)
return delta * above
class Sigmoid(Activation):
def __call__(self, incoming):
return 1 / (1 + np.exp(-incoming))
def delta(self, incoming, outgoing, above):
delta = outgoing * (1 - outgoing)
return delta * above
class Relu(Activation):
def __call__(self, incoming):
return np.maximum(incoming, 0)
def delta(self, incoming, outgoing, above):
delta = np.greater(incoming, 0).astype(float)
return delta * above
class Softmax(Activation):
def __call__(self, incoming):
# The constant doesn't change the expression but prevents overflows.
constant = np.max(incoming)
exps = np.exp(incoming - constant)
return exps / exps.sum()
def delta(self, incoming, outgoing, above):
delta = outgoing * above
sum_ = delta.sum(axis=delta.ndim - 1, keepdims=True)
delta -= outgoing * sum_
return delta
class SparseField(Activation):
def __init__(self, inhibition=0.05, leaking=0.0):
self.inhibition = inhibition
self.leaking = leaking
def __call__(self, incoming):
count = len(incoming)
length = int(np.sqrt(count))
assert length ** 2 == count, 'layer size must be a square'
field = incoming.copy().reshape((length, length))
radius = int(np.sqrt(self.inhibition * count)) // 2
assert radius, 'no inhibition due to small factor'
outgoing = np.zeros(field.shape)
while True:
x, y = np.unravel_index(field.argmax(), field.shape)
if field[x, y] <= 0:
break
outgoing[x, y] = 1
surrounding = np.s_[
max(x - radius, 0):min(x + radius + 1, length),
max(y - radius, 0):min(y + radius + 1, length)]
field[surrounding] = 0
assert field[x, y] == 0
outgoing = outgoing.reshape(count)
outgoing = np.maximum(outgoing, self.leaking * incoming)
return outgoing
def delta(self, incoming, outgoing, above):
delta = np.greater(outgoing, 0).astype(float)
return delta * above
class SparseRange(Activation):
"""
E%-Max Winner-Take-All.
Binary activation. First, the activation function is applied. Then all
neurons within the specified range below the strongest neuron are set to
one. All others are set to zero. The gradient is the one of the activation
function for active neurons and zero otherwise.
See: A Second Function of Gamma Frequency Oscillations: An E%-Max
Winner-Take-All Mechanism Selects Which Cells Fire. (2009)
"""
def __init__(self, range_=0.3, function=Sigmoid()):
assert 0 < range_ < 1
self._range = range_
self._function = function
def __call__(self, incoming):
incoming = self._function(incoming)
threshold = self._threshold(incoming)
active = (incoming >= threshold)
outgoing = np.zeros(incoming.shape)
outgoing[active] = 1
# width = active.sum() * 80 / 1000
# print('|', '#' * width, ' ' * (80 - width), '|')
return outgoing
def delta(self, incoming, outgoing, above):
# return self._function.delta(incoming, outgoing, outgoing * above)
return outgoing * self._function.delta(incoming, outgoing, above)
def _threshold(self, incoming):
min_, max_ = incoming.min(), incoming.max()
threshold = min_ + (max_ - min_) * (1 - self._range)
return threshold
network.py
网络的一些结构具体化,feed啥的
import operator
import numpy as np
class Layer:
def __init__(self, size, activation):
assert size and isinstance(size, int)
self.size = size
self.activation = activation()
self.incoming = np.zeros(size)
self.outgoing = np.zeros(size)
assert len(self.incoming) == len(self.outgoing) == self.size
def __len__(self):
assert len(self.incoming) == len(self.outgoing)
return len(self.incoming)
def __repr__(self):
return repr(self.outgoing)
def __str__(self):
table = zip(self.incoming, self.outgoing)
rows = [' /'.join('{: >6.3f}'.format(x) for x in row) for row in table]
return '\n'.join(rows)
def apply(self, incoming):
"""
Store the incoming activation, apply the activation function and store
the result as outgoing activation.
"""
assert len(incoming) == self.size
self.incoming = incoming
outgoing = self.activation(self.incoming)
assert len(outgoing) == self.size
self.outgoing = outgoing
def delta(self, above):
"""
The derivative of the activation function at the current state.
"""
return self.activation.delta(self.incoming, self.outgoing, above)
class Matrices:
def __init__(self, shapes, elements=None):
self.shapes = shapes
length = sum(x * y for x, y in shapes)
if elements is not None:
assert len(elements) == length
elements = elements.copy()
else:
elements = np.zeros(length)
self.flat = elements
def __len__(self):
return len(self.shapes)
def __getitem__(self, index):
if hasattr(index, '__len__'):
assert isinstance(index[0], int)
return self[index[0]][index[1:]]
if isinstance(index, slice):
return [self[i] for i in self._range_from_slice(index)]
slice_ = self._locate(index)
data = self.flat[slice_]
data = data.reshape(self.shapes[index])
return data
def __setitem__(self, index, data):
if hasattr(index, '__len__'):
assert isinstance(index[0], int)
self[index[0]][index[1:]] = data
return
if isinstance(index, slice):
for i in self._range_from_slice(index):
self[i] = data
return
slice_ = self._locate(index)
data = data.reshape(slice_.stop - slice_.start)
self.flat[slice_] = data
def __getattr__(self, name):
# Tunnel not found properties to the underlying array.
flat = super().__getattribute__('flat')
return getattr(flat, name)
def __setattr_(self, name, value):
# Ensure that the size of the underlying array doesn't change.
if name == 'flat':
assert value.shape == self.flat.shape
super().__setattr__(name, value)
def copy(self):
return Matrices(self.shapes, self.flat.copy())
def __add__(self, other):
return self._operation(other, lambda x, y: x + y)
def __sub__(self, other):
return self._operation(other, lambda x, y: x - y)
def __mul__(self, other):
return self._operation(other, lambda x, y: x * y)
def __truediv__(self, other):
return self._operation(other, lambda x, y: x / y)
__rmul__ = __mul__
__radd__ = __add__
def _operation(self, other, operation):
try:
other = other.flat
except AttributeError:
pass
return Matrices(self.shapes, operation(self.flat, other))
def _locate(self, index):
assert isinstance(index, int), (
'Only single elemente can be indiced in the first dimension.')
if index < 0:
index = len(self.shapes) + index
if not 0 <= index < len(self.shapes):
raise IndexError
offset = sum(x * y for x, y in self.shapes[:index])
length = operator.mul(*self.shapes[index])
return slice(offset, offset + length)
def _range_from_slice(self, slice_):
start = slice_.start if slice_.start else 0
stop = slice_.stop if slice_.stop else len(self.shapes)
step = slice_.step if slice_.step else 1
return range(start, stop, step)
def __str__(self):
return str(len(self.flat)) + str(self.flat)
class Network:
def __init__(self, layers):
self.layers = layers
self.sizes = tuple(layer.size for layer in self.layers)
# Weight matrices have the dimensions of the two layers around them.
# Also, there is an additional bias input to each weight matrix.
self.shapes = zip(self.sizes[:-1], self.sizes[1:])
self.shapes = [(x + 1, y) for x, y in self.shapes]
# Weight matrices are in between the layers.
assert len(self.shapes) == len(self.layers) - 1
def feed(self, weights, data):
"""
Evaluate the network with alternative weights on the input data and
return the output activation.
"""
assert len(data) == self.layers[0].size
self.layers[0].apply(data)
# Propagate trough the remaining layers.
connections = zip(self.layers[:-1], weights, self.layers[1:])
for previous, weight, current in connections:
incoming = self.forward(weight, previous.outgoing)
current.apply(incoming)
# Return the activations of the output layer.
return self.layers[-1].outgoing
@staticmethod
def forward(weight, activations):
# Add bias input of one.
activations = np.insert(activations, 0, 1)
assert activations[0] == 1
right = activations.dot(weight)
return right
@staticmethod
def backward(weight, activations):
left = activations.dot(weight.transpose())
# Don't expose the bias input of one.
left = left[1:]
return left
problem.py
import os
import yaml
import layered.cost
import layered.dataset
import layered.activation
from layered.network import Layer
class Problem:
def __init__(self, content=None):
"""
Construct a problem. If content is specified, try to load it as a YAML
path and otherwise treat it as an inline YAML string.
"""
if content and os.path.isfile(content):
with open(content) as file_:
self.parse(file_)
elif content:
self.parse(content)
self._validate()
def __str__(self):
keys = self.__dict__.keys() & self._defaults().keys()
return str({x: getattr(self, x) for x in keys})
def parse(self, definition):
definition = yaml.load(definition)
self._load_definition(definition)
self._load_symbols()
self._load_layers()
self._load_weight_tying()
assert not definition, (
'unknown properties {} in problem definition'
.format(', '.join(definition.keys())))
def _load_definition(self, definition):
# The empty dictionary causes defaults to be loaded even if the
# definition is None.
if not definition:
definition = {}
for name, default in self._defaults().items():
type_ = type(default)
self.__dict__[name] = type_(definition.pop(name, default))
def _load_symbols(self):
# pylint: disable=attribute-defined-outside-init
self.cost = self._find_symbol(layered.cost, self.cost)()
self.dataset = self._find_symbol(layered.dataset, self.dataset)()
def _load_layers(self):
for index, layer in enumerate(self.layers):
size, activation = layer.pop('size'), layer.pop('activation')
activation = self._find_symbol(layered.activation, activation)
self.layers[index] = Layer(size, activation)
def _load_weight_tying(self):
# pylint: disable=attribute-defined-outside-init
self.weight_tying = [[y.split(',') for y in x]
for x in self.weight_tying]
for i, group in enumerate(self.weight_tying):
for j, slices in enumerate(group):
for k, slice_ in enumerate(slices):
slice_ = [int(s) if s else None for s in slice_.split(':')]
self.weight_tying[i][j][k] = slice(*slice_)
for i, group in enumerate(self.weight_tying):
for j, slices in enumerate(group):
assert not slices[0].start and not slices[0].step, (
'Ranges are not allowed in the first dimension.')
self.weight_tying[i][j][0] = slices[0].stop
def _find_symbol(self, module, name, fallback=None):
"""
Find the symbol of the specified name inside the module or raise an
exception.
"""
if not hasattr(module, name) and fallback:
return self._find_symbol(module, fallback, None)
return getattr(module, name)
def _validate(self):
num_input = len(self.dataset.training[0].data)
num_output = len(self.dataset.training[0].target)
if self.layers:
assert self.layers[0].size == num_input, (
'the size of the input layer must match the training data')
assert self.layers[-1].size == num_output, (
'the size of the output layer must match the training labels')
@staticmethod
def _defaults():
return {
'cost': 'SquaredError',
'dataset': 'Modulo',
'layers': [],
'epochs': 1,
'batch_size': 1,
'learning_rate': 0.1,
'momentum': 0.0,
'weight_scale': 0.1,
'weight_mean': 0.0,
'weight_decay': 0.0,
'weight_tying': [],
'evaluate_every': 1000,
}
evaluation.py
对预测结果进行评价
import numpy as np
def compute_costs(network, weights, cost, examples):
prediction = [network.feed(weights, x.data) for x in examples]
costs = [cost(x, y.target).mean() for x, y in zip(prediction, examples)]
return costs
def compute_error(network, weights, examples):
prediction = [network.feed(weights, x.data) for x in examples]
error = sum(bool(np.argmax(x) != np.argmax(y.target)) for x, y in
zip(prediction, examples)) / len(examples)
return error
gradient.py
计算梯度
import math
import functools
import multiprocessing
import numpy as np
from layered.network import Matrices
from layered.utility import batched
class Gradient:
def __init__(self, network, cost):
self.network = network
self.cost = cost
def __call__(self, weights, example):
raise NotImplementedError
class Backprop(Gradient):
"""
Use the backpropagation algorithm to efficiently determine the gradient of
the cost function with respect to each individual weight.
"""
def __call__(self, weights, example):
prediction = self.network.feed(weights, example.data)
delta_output = self._delta_output(prediction, example.target)
delta_layers = self._delta_layers(weights, delta_output)
delta_weights = self._delta_weights(delta_layers)
return delta_weights
def _delta_output(self, prediction, target):
assert len(target) == self.network.layers[-1].size
# The derivative with respect to the output layer is computed as the
# product of error derivative and local derivative at the layer.
delta_cost = self.cost.delta(prediction, target)
delta_output = self.network.layers[-1].delta(delta_cost)
assert len(delta_cost) == len(delta_output) == len(target)
return delta_output
def _delta_layers(self, weights, delta_output):
# Propagate backwards through the hidden layers but not the input
# layer. The current weight matrix is the one to the right of the
# current layer.
gradient = [delta_output]
hidden = list(zip(weights[1:], self.network.layers[1:-1]))
assert all(x.shape[0] - 1 == len(y) for x, y in hidden)
for weight, layer in reversed(hidden):
delta = self._delta_layer(layer, weight, gradient[-1])
gradient.append(delta)
return reversed(gradient)
def _delta_layer(self, layer, weight, above):
# The gradient at a layer is computed as the derivative of both the
# local activation and the weighted sum of the derivatives in the
# deeper layer.
backward = self.network.backward(weight, above)
delta = layer.delta(backward)
assert len(layer) == len(backward) == len(delta)
return delta
def _delta_weights(self, delta_layers):
# The gradient with respect to the weights is computed as the gradient
# at the target neuron multiplied by the activation of the source
# neuron.
gradient = Matrices(self.network.shapes)
prev_and_delta = zip(self.network.layers[:-1], delta_layers)
for index, (previous, delta) in enumerate(prev_and_delta):
# We want to tweak the bias weights so we need them in the
# gradient.
activations = np.insert(previous.outgoing, 0, 1)
assert activations[0] == 1
gradient[index] = np.outer(activations, delta)
return gradient
class NumericalGradient(Gradient):
"""
Approximate the gradient for each weight individually by sampling the error
function slightly above and below the current value of the weight.
"""
def __init__(self, network, cost, distance=1e-5):
super().__init__(network, cost)
self.distance = distance
def __call__(self, weights, example):
"""
Modify each weight individually in both directions to calculate a
numeric gradient of the weights.
"""
# We need a copy of the weights that we can modify to evaluate the cost
# function on.
modified = Matrices(weights.shapes, weights.flat.copy())
gradient = Matrices(weights.shapes)
for i, connection in enumerate(weights):
for j, original in np.ndenumerate(connection):
# Sample above and below and compute costs.
modified[i][j] = original + self.distance
above = self._evaluate(modified, example)
modified[i][j] = original - self.distance
below = self._evaluate(modified, example)
# Restore the original value so we can reuse the weight matrix
# for the next iteration.
modified[i][j] = original
# Compute the numeric gradient.
sample = (above - below) / (2 * self.distance)
gradient[i][j] = sample
return gradient
def _evaluate(self, weights, example):
prediction = self.network.feed(weights, example.data)
cost = self.cost(prediction, example.target)
assert cost.shape == prediction.shape
return cost.sum()
class CheckedBackprop(Gradient):
"""
Computes the gradient both analytically trough backpropagation and
numerically to validate the backpropagation implementation and derivatives
of activation functions and cost functions. This is slow by its nature and
it's recommended to validate derivatives on small networks.
"""
def __init__(self, network, cost, distance=1e-5, tolerance=1e-8):
self.tolerance = tolerance
super().__init__(network, cost)
self.analytic = Backprop(network, cost)
self.numeric = NumericalGradient(network, cost, distance)
def __call__(self, weights, example):
analytic = self.analytic(weights, example)
numeric = self.numeric(weights, example)
distances = np.absolute(analytic.flat - numeric.flat)
worst = distances.max()
if worst > self.tolerance:
print('Gradient differs by {:.2f}%'.format(100 * worst))
else:
print('Gradient looks good')
return analytic
class BatchBackprop:
"""
Calculate the average gradient over a batch of examples.
"""
def __init__(self, network, cost):
self.backprop = Backprop(network, cost)
def __call__(self, weights, examples):
gradient = Matrices(weights.shapes)
for example in examples:
gradient += self.backprop(weights, example)
return gradient / len(examples)
class ParallelBackprop:
"""
Alternative to BatchBackprop that yields the same results but utilizes
multiprocessing to make use of more than one processor core.
"""
def __init__(self, network, cost, workers=4):
self.backprop = BatchBackprop(network, cost)
self.workers = workers
self.pool = multiprocessing.Pool(self.workers)
def __call__(self, weights, examples):
batch_size = int(math.ceil(len(examples) / self.workers))
batches = list(batched(examples, batch_size))
sizes = [len(x) / batch_size for x in batches]
sizes = [x / sum(sizes) for x in sizes]
assert len(batches) <= self.workers
assert sum(sizes) == 1
compute = functools.partial(self.backprop, weights)
gradients = self.pool.map(compute, batches)
return sum(x * y for x, y in zip(gradients, sizes))
plot.py
画图工具
# pylint: disable=wrong-import-position
import collections
import time
import warnings
import inspect
import threading
import matplotlib
# Don't call the code if Sphinx inspects the file mocking external imports.
if inspect.ismodule(matplotlib): # noqa
# On Mac force backend that works with threading.
if matplotlib.get_backend() == 'MacOSX':
matplotlib.use('TkAgg')
# Hide matplotlib deprecation message.
warnings.filterwarnings('ignore', category=matplotlib.cbook.mplDeprecation)
# Ensure available interactive backend.
if matplotlib.get_backend() not in matplotlib.rcsetup.interactive_bk:
print('No visual backend available. Maybe you are inside a virtualenv '
'that was created without --system-site-packages.')
import matplotlib.pyplot as plt
class Interface:
def __init__(self, title='', xlabel='', ylabel='', style=None):
self._style = style or {}
self._title = title
self._xlabel = xlabel
self._ylabel = ylabel
self.xdata = []
self.ydata = []
self.width = 0
self.height = 0
@property
def style(self):
return self._style
@property
def title(self):
return self._title
@property
def xlabel(self):
return self._xlabel
@property
def ylabel(self):
return self._ylabel
class State:
def __init__(self):
self.running = False
class Window:
def __init__(self, refresh=0.5):
self.refresh = refresh
self.thread = None
self.state = State()
self.figure = plt.figure()
self.interfaces = []
plt.ion()
plt.show()
def register(self, position, interface):
axis = self.figure.add_subplot(
position, title=interface.title,
xlabel=interface.xlabel, ylabel=interface.ylabel)
axis.get_xaxis().set_ticks([])
line, = axis.plot(interface.xdata, interface.ydata, **interface.style)
self.interfaces.append((axis, line, interface))
def start(self, work):
"""
Hand the main thread to the window and continue work in the provided
function. A state is passed as the first argument that contains a
`running` flag. The function is expected to exit if the flag becomes
false. The flag can also be set to false to stop the window event loop
and continue in the main thread after the `start()` call.
"""
assert threading.current_thread() == threading.main_thread()
assert not self.state.running
self.state.running = True
self.thread = threading.Thread(target=work, args=(self.state,))
self.thread.start()
while self.state.running:
try:
before = time.time()
self.update()
duration = time.time() - before
plt.pause(max(0.001, self.refresh - duration))
except KeyboardInterrupt:
self.state.running = False
self.thread.join()
return
def stop(self):
"""
Close the window and stops the worker thread. The main thread will
resume with the next command after the `start()` call.
"""
assert threading.current_thread() == self.thread
assert self.state.running
self.state.running = False
def update(self):
"""
Redraw the figure to show changed data. This is automatically called
after `start()` was run.
"""
assert threading.current_thread() == threading.main_thread()
for axis, line, interface in self.interfaces:
line.set_xdata(interface.xdata)
line.set_ydata(interface.ydata)
axis.set_xlim(0, interface.width or 1, emit=False)
axis.set_ylim(0, interface.height or 1, emit=False)
self.figure.canvas.draw()
class Plot(Interface):
def __init__(self, title, xlabel, ylabel, style=None, fixed=None):
# pylint: disable=too-many-arguments, redefined-variable-type
super().__init__(title, xlabel, ylabel, style or {})
self.max_ = 0
if not fixed:
self.xdata = []
self.ydata = []
else:
self.xdata = list(range(fixed))
self.ydata = collections.deque([None] * fixed, maxlen=fixed)
self.width = fixed
def __call__(self, values):
self.ydata += values
self.max_ = max(self.max_, *values)
self.height = 1.05 * self.max_
while len(self.xdata) < len(self.ydata):
self.xdata.append(len(self.xdata))
self.width = len(self.xdata) - 1
assert len(self.xdata) == len(self.ydata)
optimization.py
一些优化算法,主要是基于梯度
class GradientDecent:
"""
Adapt the weights in the opposite direction of the gradient to reduce the
error.
"""
def __call__(self, weights, gradient, learning_rate=0.1):
return weights - learning_rate * gradient
class Momentum:
"""
Slow down changes of direction in the gradient by aggregating previous
values of the gradient and multiplying them in.
"""
def __init__(self):
self.previous = None
def __call__(self, gradient, rate=0.9):
gradient = gradient.copy()
if self.previous is None:
self.previous = gradient.copy()
else:
assert self.previous.shape == gradient.shape
gradient += rate * self.previous
self.previous = gradient.copy()
return gradient
class WeightDecay:
"""
Slowly moves each weight closer to zero for regularization. This can help
the model to find simpler solutions.
"""
def __call__(self, weights, rate=1e-4):
return (1 - rate) * weights
class WeightTying:
"""
Constraint groups of slices of the gradient to have the same value by
averaging them. Should be applied to the initial weights and each gradient.
"""
def __init__(self, *groups):
for group in groups:
assert group and hasattr(group, '__len__')
assert all([isinstance(x[0], int) for x in group])
assert all([isinstance(y, (slice, int)) for x in group for y in x])
self.groups = groups
def __call__(self, matrices):
matrices = matrices.copy()
for group in self.groups:
slices = [matrices[slice_] for slice_ in group]
assert all([x.shape == slices[0].shape for x in slices]), (
'All slices within a group must have the same shape. '
'Shapes are ' + ', '.join(str(x.shape) for x in slices) + '.')
average = sum(slices) / len(slices)
assert average.shape == slices[0].shape
for slice_ in group:
matrices[slice_] = average
return matrices
trainer.py
训练器
import functools
import numpy as np
from layered.gradient import BatchBackprop, CheckedBackprop
from layered.network import Network, Matrices
from layered.optimization import (
GradientDecent, Momentum, WeightDecay, WeightTying)
from layered.utility import repeated, batched
from layered.evaluation import compute_costs, compute_error
class Trainer:
# pylint: disable=attribute-defined-outside-init, too-many-arguments
def __init__(self, problem, load=None, save=None,
visual=False, check=False):
self.problem = problem
self.load = load
self.save = save
self.visual = visual
self.check = check
self._init_network()
self._init_training()
self._init_visualize()
def _init_network(self):
"""Define model and initialize weights."""
self.network = Network(self.problem.layers)
self.weights = Matrices(self.network.shapes)
if self.load:
loaded = np.load(self.load)
assert loaded.shape == self.weights.shape, (
'weights to load must match problem definition')
self.weights.flat = loaded
else:
self.weights.flat = np.random.normal(
self.problem.weight_mean, self.problem.weight_scale,
len(self.weights.flat))
def _init_training(self):
# pylint: disable=redefined-variable-type
"""Classes needed during training."""
if self.check:
self.backprop = CheckedBackprop(self.network, self.problem.cost)
else:
self.backprop = BatchBackprop(self.network, self.problem.cost)
self.momentum = Momentum()
self.decent = GradientDecent()
self.decay = WeightDecay()
self.tying = WeightTying(*self.problem.weight_tying)
self.weights = self.tying(self.weights)
def _init_visualize(self):
if not self.visual:
return
from layered.plot import Window, Plot
self.plot_training = Plot(
'Training', 'Examples', 'Cost', fixed=1000,
style={'linestyle': '', 'marker': '.'})
self.plot_testing = Plot('Testing', 'Time', 'Error')
self.window = Window()
self.window.register(211, self.plot_training)
self.window.register(212, self.plot_testing)
def __call__(self):
"""Train the model and visualize progress."""
print('Start training')
repeats = repeated(self.problem.dataset.training, self.problem.epochs)
batches = batched(repeats, self.problem.batch_size)
if self.visual:
self.window.start(functools.partial(self._train_visual, batches))
else:
self._train(batches)
def _train(self, batches):
for index, batch in enumerate(batches):
try:
self._batch(index, batch)
except KeyboardInterrupt:
print('\nAborted')
return
print('Done')
def _train_visual(self, batches, state):
for index, batch in enumerate(batches):
if not state.running:
print('\nAborted')
return
self._batch(index, batch)
print('Done')
input('Press any key to close window')
state.running = False
def _batch(self, index, batch):
if self.check:
assert len(batch) == 1
gradient = self.backprop(self.weights, batch[0])
else:
gradient = self.backprop(self.weights, batch)
gradient = self.momentum(gradient, self.problem.momentum)
gradient = self.tying(gradient)
self.weights = self.decent(
self.weights, gradient, self.problem.learning_rate)
self.weights = self.decay(self.weights, self.problem.weight_decay)
self._visualize(batch)
self._evaluate(index)
def _visualize(self, batch):
if not self.visual:
return
costs = compute_costs(
self.network, self.weights, self.problem.cost, batch)
self.plot_training(costs)
def _evaluate(self, index):
if not self._every(self.problem.evaluate_every,
self.problem.batch_size, index):
return
if self.save:
np.save(self.save, self.weights)
error = compute_error(
self.network, self.weights, self.problem.dataset.testing)
print('Batch {} test error {:.2f}%'.format(index, 100 * error))
if self.visual:
self.plot_testing([error])
@staticmethod
def _every(times, step_size, index):
"""
Given a loop over batches of an iterable and an operation that should
be performed every few elements. Determine whether the operation should
be called for the current index.
"""
current = index * step_size
step = current // times * times
reached = current >= step
overshot = current >= step + step_size
return current and reached and not overshot
main.py
import os
import argparse
from layered.problem import Problem
from layered.trainer import Trainer
def main():
parser = argparse.ArgumentParser('layered')
parser.add_argument(
'problem',
help='path to the YAML problem definition')
parser.add_argument(
'-v', '--visual', action='store_true',
help='show a diagram of training costs')
parser.add_argument(
'-l', '--load', default=None,
help='path to load the weights from at startup')
parser.add_argument(
'-s', '--save', default=None,
help='path to dump the learned weights at each evaluation')
parser.add_argument(
'-c', '--check', action='store_true',
help='whether to activate gradient checking')
args = parser.parse_args()
print('Problem', os.path.split(args.problem)[1])
problem = Problem(args.problem)
trainer = Trainer(
problem, args.load, args.save, args.visual, args.check)
trainer()
if __name__ == '__main__':
main()
这就好了