import numpy as np
#神经网络的分类任务
#理论上两层神经网络就足够拟合任意函数
#为了使得神经网络不仅仅只能拟合线性函数,引入了激活层
#常见的激活函数有三种:阶跃,Sigmoid,ReLU函数
def affine_forward(x,w,b):
out=None
N=x.shape[0]
x_row=x.reshape(N,-1)
out=np.dot(x_row,w)+b #???为什么b的维度不一样
cache=(x,w,b)
return out,cache
def affine_backward(dout,cache):
x,w,b=cache
dx,dw,db=None,None,None
dx=np.dot(dout,w.T)
dx=np.reshape(dx,x.shape)
x_row=x.reshape(x.shape[0],-1)
dw=np.dot(x_row.T,dout)
db=np.sum(dout,axis=0,keepdims=True)
return dx,dw,db
X=np.array([[2,1],
[-1,1],
[-1,-1],
[1,-1]])
t=np.array([0,1,2,3])
np.random.seed(1)
input_dim=X.shape[1] #2
num_classes=t.shape[0] #4
hidden_dim=100
reg=0.001
epsilon=0.001 #梯度下降的学习率,为可调参数
W1=np.random.randn(input_dim,hidden_dim) #2x50
W2=np.random.randn(hidden_dim,num_classes) #50x4
b1=np.zeros((1,hidden_dim))
b2=np.zeros((1,num_classes))
for j in range(10000):
H,fc_cache=affine_forward(X,W1,b1)
H=np.maximum(0,H)
relu_cache=H
Y,cachey=affine_forward(H,W2,b2)
probs=np.exp(Y-np.max(Y,axis=1,keepdims=True))
probs/=np.sum(probs,axis=1,keepdims=True)
N=Y.shape[0]
#print(probs[np.arange(N),t])
loss=-np.sum(np.log(probs[np.arange(N),t]))/N
#print(loss)
dx=probs.copy()
dx[np.arange(N),t]-=1
dx/=N
dh1,dW2,db2=affine_backward(dx,cachey)
dh1[relu_cache<=0]=0
dX,dW1,db1=affine_backward(dh1,fc_cache)
#正则化惩罚项。为了避免最后求出的W过于集中所设置的项为了衡量分散度,故进行如下修正
dW2+=reg*W2
dW1+=reg*W1
W2+=-epsilon*dW2
b2+=-epsilon*db2
W1+=-epsilon*dW1
b1+=-epsilon*db1
test=np.array([[22,33],[-100,20],[-72,-5],[3,-20]])
H,cf_cache=affine_forward(test,W1,b1)
H=np.maximum(0,H)
relu_cache=H
Y,cachey=affine_forward(H,W2,b2)
probs=np.exp(Y-np.max(Y,axis=1,keepdims=True))
probs/=np.sum(probs,axis=1,keepdims=True)
print(probs)
for k in range(4):
print(test[k,:],"所在的象限为",np.argmax(probs[k,:])+1)