神经网络实现基本的与或异或逻辑

平时计算机领域的OR AND XOR逻辑问题就不去详说,大家都有学习过。

基本的逻辑图如下:

最开始神经网络解决线性可分问题给它带来了一次小高峰,但是在几年之后一个学者提出了XOR非线性问题的时候,并且专门写了一篇论文论述神经网络对非线性问题求解的无能为力直接给当年的神经网络的发展带来了寒冰时代。直到十几年后,多层网络的出现,也就是俗称的MLP(Multiply layer perceptron)才把Neural Network带来不断发展的时期。

我们知道OR或者AND都是线性可分,而XOR却是非线性可分的,用一幅图表示:

对于第三个坐标是无论如何也无法画出一条2维坐标上的直线把星星和圆圆直接分开的。

接下来我们使用最开始的神经网络代码来实现,也就是一个输入层,然后加上各自的权重后再总体加上偏置得到输出。

代码:

percetron.py

import numpy as np

class Perceptron:
	def __init__(self, N, alpha=0.1):
		self.W = np.random.randn(N + 1) / np.sqrt(N)
		self.alpha = alpha

	def step(self, x):
		return 1 if x > 0 else 0

	def fit(self, X, y, epochs=10):
		X = np.c_[X, np.ones((X.shape[0]))]

		for epoch in np.arange(0, epochs):
			for (x, target) in zip(X, y):
				p = self.step(np.dot(x, self.W))

				if p != target:
					error = p - target
					self.W += -self.alpha * error * x

	def predict(self, X, addBias=True):
		X = np.atleast_2d(X)

		if addBias:
			X = np.c_[X, np.ones((X.shape[0]))]

		return self.step(np.dot(X, self.W))

	

test.py

from perceptron import Perceptron
import numpy as np

X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y_or = np.array([[0], [1], [1], [1]])
y_and = np.array([[0], [0], [0], [1]])
y_xor = np.array([[1], [0], [0], [1]])

print("[INFO] training perceptron....")
p = Perceptron(X.shape[1], alpha=0.1)
p.fit(X, y_or, epochs=20)

print("[INFO] testing perceptron OR...")
for (x, target) in zip(X, y_or):
	pred = p.predict(x)
	print("[INFO] data={}, ground_truth={}, pred={}".format(x, target[0], pred))

print("[INFO] training perceptron AND....")
p = Perceptron(X.shape[1], alpha=0.1)
p.fit(X, y_and, epochs=20)

print("[INFO] testing perceptron AND...")
for (x, target) in zip(X, y_and):
	pred = p.predict(x)
	print("[INFO] data={}, ground_truth={}, pred={}".format(x, target[0], pred))

print("[INFO] training perceptron XOR....")
p = Perceptron(X.shape[1], alpha=0.1)
p.fit(X, y_xor, epochs=200)

print("[INFO] testing perceptron XOR...")
for (x, target) in zip(X, y_xor):
	pred = p.predict(x)
	print("[INFO] data={}, ground_truth={}, pred={}".format(x, target[0], pred))

print("X.shape\n", X.shape)
print("X.shape[0]\n", X.shape[0])
print("X.shape[1]\n", X.shape[1])
	

result:

可见对于XOR问题,没有隐藏层存在的情况下,神经网络基本学不到那种分类能力。然后我们改进网络,加入hidden layers,然后看能否解决问题,这里只加入一层的隐藏层。

neuralnetwork.py

import numpy as np
# 将完整的神经网络结构定义成类
class NeuralNetwork:
	# 初始化,构造函数
	def __init__(self, layers, alpha=0.1):
		self.W = []
		self.layers = layers
		self.alpha = alpha
		# 除了最后两层网络外,其他的都初始化Weight
		for i in np.arange(0, len(layers) - 2):
			# 先初始化常规的weights矩阵
			w = np.random.randn(layers[i] + 1, layers[i+1] + 1)
			# 归一化
			self.W.append(w / np.sqrt(layers[i]))
			# print("W without bias trick:\n", self.W)
			# 使用bias trick也就是在W矩阵最后一列加入新的一列作为bias然后weight和bias合并为一个大W矩阵
			# biases可以作为学习参数进行学习
		w= np.random.randn(layers[-2] + 1, layers[-1])
		# 归一化
		self.W.append(w / np.sqrt(layers[-2]))
		# print("W with bias trick:\n", self.W)
	
	# 重载python的magic函数
	def __repr__(self):
		return "NeuralNetwork:{}".format("-".join(str(l) for l in self.layers))

	def sigmoid(self, x):
		return 1.0 / (1 + np.exp(-x))
	# 对sigmoid函数求导
	def sigmoid_deriv(self, x):
		'''
		y = 1.0 / (1 + np.exp(-x))
		return y * (1 - y)
		'''
		return x * (1 - x)

	def fit(self, X, y, epochs=1000, displayUpdate=100):
		X = np.c_[X, np.ones((X.shape[0]))]
		losses = []
		# 根据每一层网络进行反向传播,然后更新W
		for epoch in np.arange(0, epochs):
			for (x, target) in zip(X, y):
				self.fit_partial(x, target)
			# 控制显示,并且加入loss
			if epoch == 0 or (epoch + 1) % displayUpdate == 0:
				loss = self.calculate_loss(X, y)
				losses.append(loss)
				print("[INFO] epoch={}, loss={:.7f}".format(epoch + 1, loss))
		return losses
	# 链式求导
	def fit_partial(self, x, y):
		A = [np.atleast_2d(x)]

		for layer in np.arange(0, len(self.W)):
			net = A[layer].dot(self.W[layer])

			out = self.sigmoid(net)

			A.append(out)

		# backprogation algorithm
		error = A[-1] - y

		D = [error * self.sigmoid_deriv(A[-1])]

		for layer in np.arange(len(A) - 2, 0, -1):
			delta = D[-1].dot(self.W[layer].T)
			delta = delta * self.sigmoid_deriv(A[layer])
			D.append(delta)

		D = D[::-1]
		# 更新权值W
		for layer in np.arange(0, len(self.W)):
			self.W[layer] += -self.alpha * A[layer].T.dot(D[layer])
	# 预测
	def predict(self, X, addBias=True):
		p = np.atleast_2d(X)
		# 是否加入偏置
		if addBias:
			p = np.c_[p, np.ones((p.shape[0]))]
		# 正常的前向传播得到预测的输出值
		for layer in np.arange(0, len(self.W)):
			p = self.sigmoid(np.dot(p, self.W[layer]))

		return p
	# 计算loss,就是计算MSE
	def calculate_loss(self, X, targets):
		targets = np.atleast_2d(targets)
		predictions = self.predict(X, addBias=False)
		loss = 0.5 * np.sum((predictions - targets) ** 2)

		return loss


if __name__ == '__main__':
	nn = NeuralNetwork([2, 2, 1])
	print(nn)

test.py

from neuralnetwork import NeuralNetwork
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
# 生成的数据
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y_or = np.array([[0], [1], [1], [1]])
y_and = np.array([[0], [0], [0], [1]])
y_xor = np.array([[0], [1], [1], [0]])
# 构造2-2-1结构的神经网络,2个节点输入层,2个节点的隐藏层,1个节点的输出层
nn = NeuralNetwork([2, 2, 1], alpha=0.5)
# 模型开始训练,更新得到最终不断迭代更新的weigh矩阵
losses = nn.fit(X, y_xor, epochs=2000000)
# 打印输出
for (x, target) in zip(X, y_xor):
	pred = nn.predict(x)[0][0]
	step = 1 if pred > 0.5 else 0
	print("[INFO] data-{}, ground_truth={}, pred={:.4f}, step={}"
		.format(x, target[0], pred, step))

# 可视化训练过程
plt.style.use("ggplot")
plt.figure()
plt.title("Data")
cm_dark = mpl.colors.ListedColormap(['g', 'b'])
plt.scatter(X[:, 0], X[:, 1], marker="o", c=y_xor.ravel(), cmap=cm_dark, s=80)
# print(testY)

plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, len(losses)), losses)
plt.title("Training Loss")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.show()

print("W\n", nn.W)

result:

很好,可见加入一层hidden layer之后,可以很好解决非线性问题。

这里当然也可以把网络定义成之前的没有隐藏层的结构:

test.py

from neuralnetwork import NeuralNetwork
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
# 生成的数据
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y_or = np.array([[0], [1], [1], [1]])
y_and = np.array([[0], [0], [0], [1]])
y_xor = np.array([[0], [1], [1], [0]])
# 构造2-2-1结构的神经网络,2个节点输入层,2个节点的隐藏层,1个节点的输出层
nn = NeuralNetwork([2, 1], alpha=0.5)
# 模型开始训练,更新得到最终不断迭代更新的weigh矩阵
losses = nn.fit(X, y_xor, epochs=2000000)
# 打印输出
for (x, target) in zip(X, y_xor):
	pred = nn.predict(x)[0][0]
	step = 1 if pred > 0.5 else 0
	print("[INFO] data-{}, ground_truth={}, pred={:.4f}, step={}"
		.format(x, target[0], pred, step))

# 可视化训练过程
plt.style.use("ggplot")
plt.figure()
plt.title("Data")
cm_dark = mpl.colors.ListedColormap(['g', 'b'])
plt.scatter(X[:, 0], X[:, 1], marker="o", c=y_xor.ravel(), cmap=cm_dark, s=80)
# print(testY)

plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, len(losses)), losses)
plt.title("Training Loss")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.show()

print("W\n", nn.W)

result:

我们将这一MLP算法应用到MNIST数据集上看看:

testMNIST.py

from neuralnetwork import NeuralNetwork
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn import datasets
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
# 从sklearn包中导入数据MNIST,其实是MNIST数据集的缩小版,仅包含1797张images
print("[INFO] loading mnist dataset...")
digits = datasets.load_digits()
data = digits.data.astype("float")
# print(data)
# 归一化到(0, 1)
data = (data - data.min()) / (data.max() - data.min())
print("[INFO] samples:{}, dim:{}".format(data.shape[0], data.shape[1]))
# print(data)
# 75%做训练数据集,25%做测试数据集
(trainX, testX, trainY, testY) = train_test_split(data, digits.target, test_size=0.25)

print("trainY:\n", trainY)
print("testY:\n", testY)
# 将标签值向量化,即是one-hot编码,如0--[1,0,0,0,0,0,0,0,0,0],1--[0,1,0,0,0,0,0,0,0,0],9--[0,0,0,0,0,0,0,0,0,1]
trainY = LabelBinarizer().fit_transform(trainY)
testY = LabelBinarizer().fit_transform(testY)

print("Vectorize trainY:\n", trainY)
print("trainY[0]\n", testY[0])
print("Vectroize testY:\n", testY)
# 定义网络结构64-32-32-16-10,64表示输入层有64个nodes(因为8x8=64),输出层有10个nodes(10个数值0-9输出)
print("[INFO] training network...")
nn = NeuralNetwork([trainX.shape[1], 32, 32, 16, 10])
print("[INFO] {}".format(nn))
# print("trainX.shape[0]:\n", trainX.shape[0])
# print("trainX.shape:\n", trainX.shape)

print("trainX.shape\n", trainX.shape)
print("testY.shape\n", testY.shape)
# 训练模型
losses = nn.fit(trainX, trainY, epochs=5000)
# 预测,并生成报告
print("[INFO] evaluating network...")
predictions = nn.predict(testX)
predictions = predictions.argmax(axis=1)
print(classification_report(testY.argmax(axis=1), predictions))

plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, len(losses)), losses)
plt.title("Training Loss")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.show()

print("W\n", nn.W)

result:

得到97%的平均准确率还是可以的。

 

好的,下面是一个简单的 Python 实现 BP 神经网络求解异或问题的例子: ```python import numpy as np # 定义 sigmoid 函数和它的导数 def sigmoid(x): return 1 / (1 + np.exp(-x)) def sigmoid_derivative(x): return x * (1 - x) # 构建神经网络 class NeuralNetwork: def __init__(self, x, y): self.input = x self.weights1 = np.random.rand(self.input.shape[1], 4) self.weights2 = np.random.rand(4, 1) self.y = y self.output = np.zeros(self.y.shape) def feedforward(self): self.layer1 = sigmoid(np.dot(self.input, self.weights1)) self.output = sigmoid(np.dot(self.layer1, self.weights2)) def backprop(self): # 应用链式法则计算输出层误差 d_weights2 = np.dot(self.layer1.T, (2*(self.y - self.output) * sigmoid_derivative(self.output))) # 应用链式法则计算隐藏层误差 d_weights1 = np.dot(self.input.T, (np.dot(2*(self.y - self.output) * sigmoid_derivative(self.output), self.weights2.T) * sigmoid_derivative(self.layer1))) # 更新权重 self.weights1 += d_weights1 self.weights2 += d_weights2 # 输入数据和标签 X = np.array([[0,0], [0,1], [1,0], [1,1]]) y = np.array([[0],[1],[1],[0]]) # 初始化神经网络 nn = NeuralNetwork(X,y) # 训练神经网络 for i in range(1500): nn.feedforward() nn.backprop() # 输出训练结果 print(nn.output) ``` 这个代码实现了一个具有一个隐藏层的 BP 神经网络,用于求解异或问题。在构建神经网络时,我们使用了 2 个权重矩阵:weights1 和 weights2,分别连接输入层和隐藏层、隐藏层和输出层。在训练神经网络时,我们使用了前向传播和反向传播算法。其中,前向传播用于计算神经网络的输出,反向传播用于更新权重矩阵以最小化误差。最终,我们得到了一个能够正确预测异或运算结果的神经网络
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值