#定义一些必要函数
def sigmoid(Z):
A=1/(1+np.exp(-Z))
assert(A.shape==Z.shape)
cache=Z
return A,cache
def relu(Z):
A=Z
A[Z<=0]=0
assert(A.shape==Z.shape)
cache=Z
return A,cache
def backward_sigmoid(dA,A):
dA.reshape(A.shape)
dZ=dA*A*(1-A)
assert(dZ.shape==dA.shape)
return dZ
def backward_relu(dA,cache):
Z=cache
dZ=dA
dZ[Z<=0]=0
assert(dZ.shape==dA.shape)
return dZ
def initial_parameters(layerdims):
np.random.seed(3)
L=len(layerdims)
parameters={}
for l in range(1, L): #之前写成了L+1
parameters["W"+str(l)]=np.random.randn(layerdims[l],layerdims[l-1])*0.01
parameters["b"+str(l)]=np.zeros((layerdims[1],1))
assert( parameters["W"+str(l)].shape)==(layerdims[l],layerdims[l-1])
assert( parameters["b"+str(l)].shape)==(layerdims[1],1)
return parameters
def line(A_pre,W,b):
Z=np.dot(W,A_pre)+b
assert(Z.shape==(W.shape[0],A_pre.shape[1]))
cache=[A_pre,W,b] #写成[A_pre,W,b]与(A_pre,W,b)效果一样
return Z,cache
def cost(Y,AL):
L=len(parameters)/2 #答案是//2有必要吗?因为parameters必然是成对出现的!有必要的。因为可以取整。去掉小数点变成整数、不然是float型
caches=[]
A_pre=X
for l in range(1,L):
Z,cache_line=line(A_pre,parameters["W"+str(l)],parameters["b"+str(l)])
A,cache_active=relu(Z)
A_pre=A
caches.append([cache_line,cache_active])
ZL,cache_line=line(A_pre,parameters["W"+str(L)],parameters["b"+str(L)])
AL,cache_active=sigmoid(ZL)
caches.append([cache_line,cache_active])
return AL,caches
def backward_propagate(Y,AL,caches):
L=len(caches) #原来/2了 这里不应该/2
grads={}
# cost=cost(Y,AL) #去掉这句话 不加了
m=Y.shape[1]
dAL=-np.divide(Y,AL)+np.divide(1-Y,1-AL)
assert(dAL.shape==AL.shape)
grads["dA"+str(L)]=dAL
A_pre,W,b=cache_line
Z=cache_active
dZ=backward_relu(grads["dA"+str(l+1)],Z)
grads["db"+str(l+1)]=np.sum(dZ,axis=1,keepdims=True)/m
grads["dA"+str(l)]=np.dot(W.T,dZ)
# dZ=backward_relu(grads["dA"+str(l)],Z)#错误就出在这里,Z是Z(l+1)的这里不匹配应该用Z(l)的
return grads #返回值里也去掉cost
def updata_parameters(parameters,grads,learning_rate):
L=len(parameters)/ /2
for l in rang(L):
parameters["W"+str(l+1)]=parameters["W"+str(l+1)]-learning_rate*grads["dW"+str(l+1)]
parameters["b"+str(l+1)]=parameters["b"+str(l+1)]-learning_rate*grads["db"+str(l+1)]
return parameters
def sigmoid(Z):
A=1/(1+np.exp(-Z))
assert(A.shape==Z.shape)
cache=Z
return A,cache
def relu(Z):
A=Z
A[Z<=0]=0
assert(A.shape==Z.shape)
cache=Z
return A,cache
def backward_sigmoid(dA,A):
dA.reshape(A.shape)
dZ=dA*A*(1-A)
assert(dZ.shape==dA.shape)
return dZ
def backward_relu(dA,cache):
Z=cache
dZ=dA
dZ[Z<=0]=0
assert(dZ.shape==dA.shape)
return dZ
def initial_parameters(layerdims):
np.random.seed(3)
L=len(layerdims)
parameters={}
for l in range(1, L): #之前写成了L+1
parameters["W"+str(l)]=np.random.randn(layerdims[l],layerdims[l-1])*0.01
parameters["b"+str(l)]=np.zeros((layerdims[1],1))
assert( parameters["W"+str(l)].shape)==(layerdims[l],layerdims[l-1])
assert( parameters["b"+str(l)].shape)==(layerdims[1],1)
return parameters
def line(A_pre,W,b):
Z=np.dot(W,A_pre)+b
assert(Z.shape==(W.shape[0],A_pre.shape[1]))
cache=[A_pre,W,b] #写成[A_pre,W,b]与(A_pre,W,b)效果一样
return Z,cache
def cost(Y,AL):
m=Y.shape[1]
cost=-np.sum(np.multiply(Y,np.log(AL))+np.multiply(1-Y,np.log(1-AL)))/m #之前少写了np.sum
cost=np.squeeze(cost) #记得降维成一个数字 自己写的忘记加了
assert(cost.shape==()) #自己写的忘记加了
return cost
def forward_propagate(X,parameters):L=len(parameters)/2 #答案是//2有必要吗?因为parameters必然是成对出现的!有必要的。因为可以取整。去掉小数点变成整数、不然是float型
caches=[]
A_pre=X
for l in range(1,L):
Z,cache_line=line(A_pre,parameters["W"+str(l)],parameters["b"+str(l)])
A,cache_active=relu(Z)
A_pre=A
caches.append([cache_line,cache_active])
ZL,cache_line=line(A_pre,parameters["W"+str(L)],parameters["b"+str(L)])
AL,cache_active=sigmoid(ZL)
caches.append([cache_line,cache_active])
return AL,caches
def backward_propagate(Y,AL,caches):
L=len(caches) #原来/2了 这里不应该/2
grads={}
# cost=cost(Y,AL) #去掉这句话 不加了
m=Y.shape[1]
dAL=-np.divide(Y,AL)+np.divide(1-Y,1-AL)
assert(dAL.shape==AL.shape)
grads["dA"+str(L)]=dAL
# dZL=backward_sigmoid(dAL,AL)#修改 L层单独处理,然后再走后面的
cache_line_L,cache_active_L=caches[L-1]
dZL=backward_sigmoid(dAL,cache_active_L)dZ=dZL
#新增加
grads["dW"+str(L)]=np.dot(dZ,cache_line_L[0].T)/m
grads["db"+str(L)]=np.sum(dZ,axis=1,keepdims=True)/m
grads["dA"+str(L-1)]=np.dot(cache_line_L[1].T,dZ)
#####
for l in reversed(range( L-1)):cache_line,cache_active=caches[l]
A_pre,W,b=cache_line
Z=cache_active
dZ=backward_relu(grads["dA"+str(l+1)],Z)
grads["db"+str(l+1)]=np.sum(dZ,axis=1,keepdims=True)/m
grads["dA"+str(l)]=np.dot(W.T,dZ)
# dZ=backward_relu(grads["dA"+str(l)],Z)#错误就出在这里,Z是Z(l+1)的这里不匹配应该用Z(l)的
return grads #返回值里也去掉cost
def updata_parameters(parameters,grads,learning_rate):
L=len(parameters)/ /2
for l in rang(L):
parameters["W"+str(l+1)]=parameters["W"+str(l+1)]-learning_rate*grads["dW"+str(l+1)]
parameters["b"+str(l+1)]=parameters["b"+str(l+1)]-learning_rate*grads["db"+str(l+1)]
return parameters