上文的结果表明:不具有隐含层的神经网络并不能对异或的样本进行正确分类。因此需要增加隐含层。
本例程需要在同一个文件夹中新建两个文件。
1、neuralnetwork.py
# import the necessary packages
import numpy as np
class NeuralNetwork:
def __init__(self, layers, alpha=0.1):
# initialize the list of weights matrices, then store the
# network architecture and learning rate
self.W = []
self.layers = layers
self.alpha = alpha
# start looping from the index of the first layer but
# stop before we reach the last two layers
for i in np.arange(0, len(layers) - 2):
# randomly initialize a weight matrix connecting the
# number of nodes in each respective layer together,
# adding an extra node for the bias
w = np.random.randn(layers[i] + 1, layers[i + 1] + 1)
self.W.append(w / np.sqrt(layers[i]))
# the last two layers are a special case where the input
# connections need a bias term but the output does not
w = np.random.randn(layers[-2] + 1, layers[-1])
self.W.append(w / np.sqrt(layers[-2]))
def __repr__(self):
# construct and return a string that represents the network
# architecture
return "NeuralNetwork: {}".format(
"-".join(str(l) for l in self.layers))
def sigmoid(self, x):
# compute and return the sigmoid activation value for a
# given input value
return 1.0 / (1 + np.exp(-x))
def sigmoid_deriv(self, x):
# compute the derivative of the sigmoid function ASSUMING
# that 'x' has already been passed through the 'sigmoid'
# function
return x * (1 - x)
def fit(self, X, y, epochs=1000, displayUpdate=100):
# insert a column of 1's as the last entry in the feature
# matrix -- this little trick allows us to treat the bias
# as a trainable parameter within the weight matrix
X = np.c_[X, np.ones((X.shape[0]))]
# loop over the desired number of epochs
for epoch in np.arange(0, epochs):
# loop over each individual data point and train
# our network on it
for (x, target) in zip(X, y):
self.fit_partial(x, target)
# check to see if we should display a training update
if epoch == 0 or (epoch + 1) % displayUpdate == 0:
loss = self.calculate_loss(X, y)
print("[INFO] epoch={}, loss={:.7f}".format(
epoch + 1, loss))
def fit_partial(self, x, y):
# construct our list of output activations for each layer
# as our data point flows through the network; the first
# activation is a special case -- it's just the input
# feature vector itself
A = [np.atleast_2d(x)]
# FEEDFORWARD:
# loop over the layers in the network
for layer in np.arange(0, len(self.W)):
# feedforward the activation at the current layer by
# taking the dot product between the activation and
# the weight matrix -- this is called the "net input"
# to the current layer
net = A[layer].dot(self.W[layer])
# computing the "net output" is simply applying our
# nonlinear activation function to the net input
out = self.sigmoid(net)
# once we have the net output, add it to our list of
# activations
A.append(out)
# BACKPROPAGATION
# the first phase of backpropagation is to compute the
# difference between our *prediction* (the final output
# activation in the activations list) and the true target
# value
error = A[-1] - y
# from here, we need to apply the chain rule and build our
# list of deltas 'D'; the first entry in the deltas is
# simply the error of the output layer times the derivative
# of our activation function for the output value
D = [error * self.sigmoid_deriv(A[-1])]
# once you understand the chain rule it becomes super easy
# to implement with a 'for' loop -- simply loop over the
# layers in reverse order (ignoring the last two since we
# already have taken them into account)
for layer in np.arange(len(A) - 2, 0, -1):
# the delta for the current layer is equal to the delta
# of the *previous layer* dotted with the weight matrix
# of the current layer, followed by multiplying the delta
# by the derivative of the nonlinear activation function
# for the activations of the current layer
delta = D[-1].dot(self.W[layer].T)
delta = delta * self.sigmoid_deriv(A[layer])
D.append(delta)
# since we looped over our layers in reverse order we need to
# reverse the deltas
D = D[::-1]
# WEIGHT UPDATE PHASE
# loop over the layers
for layer in np.arange(0, len(self.W)):
# update our weights by taking the dot product of the layer
# activations with their respective deltas, then multiplying
# this value by some small learning rate and adding to our
# weight matrix -- this is where the actual "learning" takes
# place
self.W[layer] += -self.alpha * A[layer].T.dot(D[layer])
def predict(self, X, addBias=True):
# initialize the output prediction as the input features -- this
# value will be (forward) propagated through the network to
# obtain the final prediction
p = np.atleast_2d(X)
# check to see if the bias column should be added
if addBias:
# insert a column of 1's as the last entry in the feature
# matrix (bias)
p = np.c_[p, np.ones((p.shape[0]))]
# loop over our layers in the network
for layer in np.arange(0, len(self.W)):
# computing the output prediction is as simple as taking
# the dot product between the current activation value 'p'
# and the weight matrix associated with the current layer,
# then passing this value through a nonlinear activation
# function
p = self.sigmoid(np.dot(p, self.W[layer]))
# return the predicted value
return p
def calculate_loss(self, X, targets):
# make predictions for the input data points then compute
# the loss
targets = np.atleast_2d(targets)
predictions = self.predict(X, addBias=False)
loss = 0.5 * np.sum((predictions - targets) ** 2)
# return the loss
return loss
我第一次遇到__repr__这个函数,这个作用是,
nn = NeuralNetwork([2, 2, 1]) # 新建一个 (2-2-1)结构的网络
print(nn) # 打印出nn
输出结果就是
NeuralNetwork: 2-2-1
更新权值的方法是backpropagation。能力有限,我还没懂这算法……以后有用到再学习了。
2、nn_xor.py
# import the necessary packages
from neuralnetwork import NeuralNetwork
import numpy as np
# construct the XOR dataset
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])
# define our 2-2-1 neural network and train it
nn = NeuralNetwork([2, 2, 1], alpha=0.5)
nn.fit(X, y, epochs=20000)
# now that our network is trained, loop over the XOR data points
for (x, target) in zip(X, y):
# make a prediction on the data point and display the result
# to our console
pred = nn.predict(x)[0][0]
step = 1 if pred > 0.5 else 0
print("[INFO] data={}, ground-truth={}, pred={:.4f}, step={}".format(
x, target[0], pred, step))
nn_xor.py的运行结果:
================ RESTART: E:\FENG\workspace_python\nn_xor.py ================
[INFO] epoch=1, loss=0.5110628
[INFO] epoch=100, loss=0.4998075
[INFO] epoch=200, loss=0.4996948
[INFO] epoch=300, loss=0.4995282
[INFO] epoch=400, loss=0.4992713
[INFO] epoch=500, loss=0.4988583
[INFO] epoch=600, loss=0.4981644
...
[INFO] epoch=19300, loss=0.0002479
[INFO] epoch=19400, loss=0.0002461
[INFO] epoch=19500, loss=0.0002444
[INFO] epoch=19600, loss=0.0002426
[INFO] epoch=19700, loss=0.0002409
[INFO] epoch=19800, loss=0.0002393
[INFO] epoch=19900, loss=0.0002376
[INFO] epoch=20000, loss=0.0002360
[INFO] data=[0 0], ground-truth=0, pred=0.0079, step=0
[INFO] data=[0 1], ground-truth=1, pred=0.9889, step=1
[INFO] data=[1 0], ground-truth=1, pred=0.9890, step=1
[INFO] data=[1 1], ground-truth=0, pred=0.0129, step=0
以上代码来自Deep Learning for Computer Vision with Python Starter Bundle第十章。