import numpy as np
from numpy import exp
from sklearn import datasets
def sigma(z):
if z>=0:
return 1/(1+exp(-z))
else:
return exp(z)/(1+exp(z))
class Net:
def __init__(self, X, Y, num):
# num 是隐藏层神经元个数
self.num = num
self.B = np.zeros(num)
self.X = X
self.Y = Y
# self.wB = np.ones(num+1) #隐藏层和输出层间的连接权和阈值
np.random.seed(2)
self.wB = np.random.random(num+1) #隐藏层和输出层间的连接权和阈值
# self.WX = np.array([np.ones(X.shape[1]+1)*0.01 for i in range(num)]) #输入层和隐藏层间的连接权和阈值
self.WX = np.array([np.random.random(X.shape[1]+1)*0.01 for i in range(num)]) #输入层和隐藏层间的连接权和阈值
def cal_g(self, Xi, yi):
# print(Xi)
# 计算 输出层到隐藏层的 梯度变量, Xi 是一个输入
# 计算 隐藏层的值
for i in range(self.num):
self.B[i] = sigma(np.dot(np.append(Xi, np.array([-1])).T, self.WX[i]))
# 使用tanh当隐层激活函数,可比sigma更快收敛
# self.B[i] = np.tanh(np.dot(np.append(Xi, np.array([-1])).T, self.WX[i]))
# print(self.B[i])
# 计算 输出层的值
y = sigma(np.dot(np.append(self.B, np.array([-1])).T, self.wB))
g = (yi-y)*y*(1-y)
self.g = g
# print(f"g:{g}")
return g
def cal_e(self, Xi, yi):
# 计算 隐藏层到输入层的 梯度变量
# 先计算g
if not self.g:
g = self.cal_g(Xi, yi)
else: g = self.g
ret = []
for i in range(self.num):
b = self.B[i]
e = b*(1-b)*g*self.wB[i]
ret.append(e)
self.e = ret
def learn(self, pace=0.1, max_num=300):
r = self.Y.shape[0]
for j in range(max_num):
for i in range(r):
Xi = self.X[i]
yi = self.Y[i]
self.cal_g(Xi, yi)
self.cal_e(Xi, yi)
g = self.g
es = self.e
self.wB += pace*g*np.append(self.B, np.array([-1])) # 更新 隐层 和输出层之间的权值
for k in range(self.num):
self.WX[k] += pace*es[k]*np.append(Xi,np.array([-1]))
return True
def __call__(self, Xi):
for i in range(self.num):
# self.B[i] = np.tanh(np.dot(np.append(Xi, np.array([-1])).T, self.WX[i]))
self.B[i] = sigma(np.dot(np.append(Xi, np.array([-1])).T, self.WX[i]))
# 计算 输出层的值
y = sigma(np.dot(np.append(self.B, np.array([-1])).T, self.wB))
return 1 if y>=0.5 else 0
def test():
iris = datasets.load_iris()
def get_i(n):
while n:
yield n%2
n = n>>1
def get_t(n):
t = []
for i in get_i(n):
t.append(i)
while len(t)<4:
t.append(0)
return t
X = np.array([(1,1),(1,0),(0,0),(0,1)])
for Y in np.array([get_t(n) for n in range(15)]):
net = Net(X, Y, 2)
net.learn(max_num=30000)
a = np.array([net(np.array(x)) for x in ([1,1],[1,0],[0,0],[0,1])])
print(f"Y:{Y}\ta:{a}\t{all(a==Y)}")
test()