#!/usr/bin/env python3
# coding=utf-8
import numpy as np
import math
import random
# 超参
learningRate = 0.1
epochs = 1
iterate = 100000
inputNeuronsNum = 2
hiddenNeuronsNum = 2
outputNeuronsNum = 1
activation_mode = "relu" # sigmoid | tagh | relu 三种激活函数
class MLP():
"""MLP network"""
def __init__(self):
super(MLP, self).__init__()
self.input2HiddenWeight = np.random.random((hiddenNeuronsNum, inputNeuronsNum))
self.hidden2OutputWeight = np.random.random((outputNeuronsNum, hiddenNeuronsNum))
self.input2HiddenBias = np.random.random((hiddenNeuronsNum, 1))
self.hidden2OutputBias = np.random.random((outputNeuronsNum, 1))
self.activation_v = np.vectorize(self.activation) # 将激活函数向量化
def activation(self, x):
if activation_mode == "sigmoid":
return (1 / (1 + math.exp(-x)))
elif activation_mode == "tagh":
return np.tanh(x)
elif activation_mode == "relu":
return np.maximum(x, 0)
else:
return x
def activation_derivative(self, x):
if activation_mode == "sigmoid":
return np.multiply(x, 1 - x)
elif activation_mode == "tagh":
return 1 - np.multiply(x, x)
elif activation_mode == "relu":
grad = np.array(x, copy=True)
grad[x > 0] = 1.
grad[x <= 0] = 0.
return grad
else:
return x
def backpropagation(self, X, Y):
if (X == Y):
target = np.matrix([0])
else:
target = np.matrix([1])
# 前馈
self.inputLayer = np.matrix([X, Y]).T
self.hiddenLayer = self.input2HiddenWeight.dot(self.inputLayer)
self.hiddenLayer = self.activation_v(self.hiddenLayer + self.input2HiddenBias)
self.outputLayer = self.hidden2OutputWeight.dot(self.hiddenLayer)
self.outputLayer = self.activation_v(self.outputLayer + self.hidden2OutputBias)
# 反向传播
outputErrors = (target - self.outputLayer)
outputGradient = self.activation_derivative(self.outputLayer)
hiddenGradient = self.activation_derivative(self.hiddenLayer)
hiddenErrors = (self.hidden2OutputWeight.T).dot(outputErrors)
hidden2OutputDeltaWeight = (learningRate * (np.multiply(outputErrors, outputGradient))).dot(self.hiddenLayer.T)
input2HiddenDeltaWeight = (learningRate * (np.multiply(hiddenErrors, hiddenGradient))).dot(self.inputLayer.T)
input2HiddenDeltaBias = (learningRate * (np.multiply(hiddenErrors, hiddenGradient)))
hidden2OutputDeltaBias = (learningRate * (np.multiply(outputErrors, outputGradient)))
# 更新weight与bias
self.hidden2OutputWeight += hidden2OutputDeltaWeight
self.input2HiddenWeight += input2HiddenDeltaWeight
self.input2HiddenBias += input2HiddenDeltaBias
self.hidden2OutputBias += hidden2OutputDeltaBias
def training(self):
print("训练中...")
for _ in range(epochs):
for i in range(iterate):
X = random.randrange(0, 2)
Y = random.randrange(0, 2)
self.backpropagation(X, Y)
print("训练完成")
def predict(self, X, Y):
print("神经网络预测...")
self.inputLayer = np.matrix([X, Y]).T
self.hiddenLayer = self.input2HiddenWeight.dot(self.inputLayer)
self.hiddenLayer = self.activation_v(self.hiddenLayer + self.input2HiddenBias)
self.outputLayer = self.hidden2OutputWeight.dot(self.hiddenLayer)
self.outputLayer = self.activation_v(self.outputLayer + self.hidden2OutputBias)
print(self.outputLayer)
if __name__ == '__main__':
mlp = MLP()
X = 1
Y = 0
mlp.training()
mlp.predict(X, Y)
其实不想写帖子了。但是好久都没更新了,要不是同事想从事机器学习行业,我可能也想不起来更新了。
我们都知道感知机是机器学习中非常入门的机器学习方法,主要是做二分类问题。也是一种线性模型,这个理论可是1957年就提出来了,后来1969年有人提出感知器无法实现XOR异或问题,因为线性不可分理论。
如果一个感知机无法实现XOR问题,但多层感知机可以实现XOR问题,相当于可以画多个线来分类,我是按照XOR problem - homepage
帖子所描述的方法来实现的。
最后,等有时间了我来详细解释一下这个有趣的问题。