import numpy as np
np.outer([1,2,3,4],[5,6,7,8])
def tanh(x):
return np.tanh(x)
def softmax(x):
exp=np.exp(x-x.max())
return exp/exp.sum()
def d_softmax(data): #求导
sm=softmax(data)
return np.diag(sm)-np.outer(sm,sm)
def d_tanh(data): #求导
return 1/(np.cosh(data)**2) #np.diag(1/(np.cosh(data)**2))用diag矩阵比较慢
differential = {softmax:d_softmax,tanh:d_tanh}
dimensions = [28*28,10] #假设 w是二维向量
activition = [tanh,softmax] #激活函数
import math
distribution=[
{‘b’:[0,0]},#可以修改
{‘b’:[0,0], #可以修改
‘w’:[-math.sqrt(6/(dimensions[0]+dimensions[1])),math.sqrt(6/(dimensions[0]+dimensions[1]))]}
] # 理论上w的取值在这之间比较好
def init_parameters_b(layer):
dist = distribution[layer][‘b’]
return np.random.rand(dimensions[layer])*(dist[1]-dist[0])+dist[0]
#random只能取[0,1],改成随机取的区间与b一样
init_parameters_b(1).shape
init_parameters_b(0).shape
def init_parameters_w(layer):
dist = distribution[layer][‘w’]
return np.random.rand(dimensions[layer-1],dimensions[layer])*(dist[1]-dist[0])+dist[0] #二维
init_parameters_w(1)
init_parameters_w(1).shape
def init_parameters():
parameter = []
for i in range(len(distribution)):
layer_parameter = {}
for j in distribution[i].keys():
if j==‘b’:
layer_parameter[‘b’] = init_parameters_b(i)
continue
if j == ‘w’:
layer_parameter[‘w’] = init_parameters_w(i)
continue
parameter.append(layer_parameter)
return parameter
parameters = init_parameters()
def predict(img,parameters):
l0_in = img+parameters[0][‘b’]
l0_out = activition0