建立第一个神经网络,它将具有一层隐藏层。
- 实现具有单个隐藏层的2分类神经网络
- 使用具有非线性激活函数的神经元,例如tanh
- 计算交叉熵损失
- 实现前向和后向传播
import numpy as np import matplotlib.pyplot as plt from testCases import * #testCases提供了一些测试示例用以评估函数的正确性 import sklearn #sklearn提供了用于数据挖掘和分析的简单有效的工具 import sklearn.datasets import sklearn.linear_model from planar_utils import plot_decision_boundary,sigmoid,load_planar_dataset,load_extra_datasets np.random.seed(1) #set a seed so that the results are consistent #获取处理的数据集 X,Y=load_planar_dataset() # x, y → 散点的坐标 # s → 散点的面积 # c → 散点的颜色(默认值为蓝色,'b',其余颜色同plt.plot( )) # marker → 散点样式(默认值为实心圆,'o',其余样式同plt.plot( )) # alpha → 散点透明度([0, 1]之间的数,0表示完全透明,1则表示完全不透明) # linewidths →散点的边缘线宽 # edgecolors → 散点的边缘颜色 # cmap → 指的是matplotlib.colors.Colormap,相当于多个调色盘的合集 # norm、vmin、vmax → 散点颜色亮度设置 # 画点状图,c表示区分不同的点的颜色的规则,这里把Y的形状reshape成了(400,)的python数组 # cmap表示使用哪一款颜色的映射 plt.scatter(X[0,:],X[1,:],c=Y.reshape(X[0,:].shape),s=40,cmap=plt.cm.Spectral) plt.show() X_shape=X.shape Y_shape=Y.shape m=X.shape[1] #训练样本个数 # print("The shape of X is: ",X_shape) # print("The shape of Y is: ",Y_shape) # print('I have m = %d training examples!' % (m)) #在数据集上训练逻辑回归分类器 clf=sklearn.linear_model.LogisticRegressionCV() clf.fit(X.T,Y.T) #调用fit方法 训练模型找规律 #绘制此模型的决策边界 plot_decision_boundary(lambda x:clf.predict(x),X,Y) plt.title("Logistic Regression") plt.show() #输出正确率 LR_predictions=clf.predict(X.T) # print ('Accuracy of logistic regression: %d ' % float((np.dot(Y,LR_predictions) + np.dot(1-Y,1-LR_predictions))/float(Y.size)*100) + # '% ' + "(percentage of correctly labelled datapoints)") #训练带有单个隐藏层的神经网络 # 建立神经网络的一般方法是: # 1.定义神经网络结构(输入单元数,隐藏单元数等)。 # 2.初始化模型的参数 # 3.循环: # 实现前向传播 # 计算损失 # 后向传播以获得梯度 # 更新参数(梯度下降) # 我们通常会构建辅助函数来计算第1-3步,然后将它们合并为nn_model()函数。 #1.定义神经网络结构 # - n_x:输入层的大小 # - n_h:隐藏层的大小(将其设置为4) # - n_y:输出层的大小 def layer_size(X,Y): n_x=X.shape[0] #相当于单个训练集的纬(长)度 n_y=Y.shape[0] n_h=4 return n_x,n_y,n_h X_assess,Y_assess=layer_sizes_test_case() n_x,n_y,n_h=layer_size(X_assess,Y_assess) # print("The size of the input layer is: n_x = " + str(n_x)) # print("The size of the hidden layer is: n_h = " + str(n_h)) # print("The size of the output layer is: n_y = " + str(n_y)) #2.初始化模型的参数 def initialize_parameters(n_x,n_y,n_h): W1=np.random.randn(n_h,n_x)*0.01 b1=np.zeros((n_h,1)) W2=np.random.randn(n_y,n_h)*0.01 b2=np.zeros((n_y,1)) assert (W1.shape == (n_h, n_x)) assert (b1.shape == (n_h, 1)) assert (W2.shape == (n_y, n_h)) assert (b2.shape == (n_y, 1)) parameters = {"W1": W1, "b1": b1, "W2": W2, "b2": b2} return parameters n_x, n_h, n_y=initialize_parameters_test_case() parameters=initialize_parameters(n_x,n_y,n_h) # print("W1 = " + str(parameters["W1"])) # print("b1 = " + str(parameters["b1"])) # print("W2 = " + str(parameters["W2"])) # print("b2 = " + str(parameters["b2"])) #3.循环 #正向传播 def forward_propagation(X_assess, parameters): # forward_propagation_test_case函数中X_assess = np.random.randn(2, 3) W1=parameters["W1"] #(4,2) b1=parameters["b1"] W2=parameters["W2"] #(1,4) b2=parameters["b2"] Z1=np.dot(W1,X_assess)+b1 A1=np.tanh(Z1) Z2=np.dot(W2,A1)+b2 A2=sigmoid(Z2) cache={ "Z1":Z1, "A1":A1, "Z2": Z2, "A2": A2 } return A2,cache X_assess, parameters=forward_propagation_test_case() A2,cache=forward_propagation(X_assess,parameters) # Note: we use the mean here just to make sure that your output matches ours. # print(np.mean(cache['Z1']) ,np.mean(cache['A1']),np.mean(cache['Z2']),np.mean(cache['A2'])) #实现compute_cost()以计算损失的值 def compute_cost(Y,A2): # logprobs=np.multiply(np.log(A2),Y) #对这两个数组的对应元素进行计算,因此它要求这两个数组有相同的大小(shape相同) # cost=-np.sum(logprobs) m=Y.shape[1] #(1,3) cost=-1/m*np.sum(Y*np.log(A2)+(1-Y)*np.log(1-A2)) cost=np.squeeze(cost) # makes sure cost is the dimension we expect. 把shape中为1的维度去掉 return cost A2, Y_assess, parameters=compute_cost_test_case() # print("Y_assess=",Y_assess) # print("A2=",A2) # print("Y_assess*A2=",Y_assess*A2) # print("cost = " + str(compute_cost(Y_assess,A2))) #反向传播 def backward_propagation(X_assess, Y_assess,parameters,cache): m=Y_assess.shape[1] W1=parameters["W1"] W2=parameters["W2"] Z1=cache["Z1"] A1=cache["A1"] Z2=cache["Z2"] A2=cache["A2"] dZ2=A2-Y_assess dW2=1/m*np.dot(dZ2,A1.T) db2=1/m*np.sum(dZ2,axis=1,keepdims=True) dg=1-np.power(A1,2) dZ1=np.dot(W2.T,dZ2)*dg #(4,3)*(4,3) dW1=1/m*np.dot(dZ1,X_assess.T) db1=1/m*np.sum(dZ1,axis=1,keepdims=True) grads={ "dW1":dW1, "db1":db1, "dW2":dW2, "db2":db2 } return grads parameters, cache, X_assess, Y_assess= backward_propagation_test_case() grads=backward_propagation(X_assess, Y_assess,parameters,cache) # print ("dW1 = "+ str(grads["dW1"])) # print ("db1 = "+ str(grads["db1"])) # print ("dW2 = "+ str(grads["dW2"])) # print ("db2 = "+ str(grads["db2"])) #实现参数更新。 使用梯度下降 def update_parameters(grads,parameters,learning_rate): dW1=grads["dW1"] db1=grads["db1"] dW2=grads["dW2"] db2=grads["db2"] W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] W1=W1-learning_rate*dW1 b1=b1-learning_rate*db1 W2 = W2 - learning_rate * dW2 b2 = b2 - learning_rate * db2 parameters={ "W1":W1, "b1":b1, "W2":W2, "b2":b2 } return parameters parameters, grads=update_parameters_test_case() parameters=update_parameters(grads,parameters,learning_rate=1.2) # print("W1 = " + str(parameters["W1"])) # print("b1 = " + str(parameters["b1"])) # print("W2 = " + str(parameters["W2"])) # print("b2 = " + str(parameters["b2"])) #4.在nn_model()中集成 def nn_model(X_assess, Y_assess,num_iterations=10000,print_cost=False): n_x,n_y,n_h=layer_size(X_assess, Y_assess) parameters=initialize_parameters(n_x,n_y,n_h) for i in range(num_iterations): A2,cache=forward_propagation(X_assess, parameters) cost=compute_cost(Y_assess,A2) grads=backward_propagation(X_assess, Y_assess, parameters, cache) parameters=update_parameters(grads,parameters,learning_rate=1.2) if print_cost and i % 1000 == 0: print("Cost after iteration %i: %f" % (i, cost)) return parameters def nn_model_example(X_assess, Y_assess,n_h,num_iterations=10000,print_cost=False): n_x=layer_size(X_assess, Y_assess)[0] n_y=layer_size(X_assess, Y_assess)[1] parameters=initialize_parameters(n_x,n_y,n_h) for i in range(num_iterations): A2,cache=forward_propagation(X_assess, parameters) cost=compute_cost(Y_assess,A2) grads=backward_propagation(X_assess, Y_assess, parameters, cache) parameters=update_parameters(grads,parameters,learning_rate=1.2) if print_cost and i % 1000 == 0: print("Cost after iteration %i: %f" % (i, cost)) return parameters X_assess,Y_assess=nn_model_test_case() parameters=nn_model(X_assess, Y_assess,num_iterations=10000,print_cost=False) # print("W1 = " + str(parameters["W1"])) # print("b1 = " + str(parameters["b1"])) # print("W2 = " + str(parameters["W2"])) # print("b2 = " + str(parameters["b2"])) #预测 def predict(parameters,X_assess): A2, cache = forward_propagation(X_assess, parameters) predictions = np.round(A2) #对给定的数组进行四舍五入,能够指定精度 return predictions parameters, X_assess = predict_test_case() predictions = predict(parameters, X_assess) print("predictions mean = " + str(np.mean(predictions))) # Build a model with a n_h-dimensional hidden layerplt.figure(figsize=(16, 32)) # # hidden_layer_sizes = [1, 2, 3, 4, 5, 10, 20] # # for i, n_h in enumerate(hidden_layer_sizes): # # plt.subplot(5, 2, i+1) # # plt.title('Hidden Layer of size %d' % n_h) # # parameters = nn_model(X, Y, n_h, num_iterations = 5000) # # plot_decision_boundary(lambda x: predict(parameters, x.T), X, Y) # # predictions = predict(parameters, X) # # accuracy = float((np.dot(Y,predictions.T) + np.dot(1-Y,1-predictions.T))/float(Y.size)*100) # # print ("Accuracy for {} hidden units: {} %".format(n_h, accuracy)) parameters = nn_model(X, Y, num_iterations = 10000, print_cost=True) # Plot the decision boundary plot_decision_boundary(lambda x: predict(parameters, x.T), X, Y) plt.title("Decision Boundary for hidden layer size " + str(4)) plt.show() # Print accuracy predictions = predict(parameters, X) print ('Accuracy: %d' % float((np.dot(Y,predictions.T) + np.dot(1-Y,1-predictions.T))/float(Y.size)*100) + '%') #This may take about 2 minutes to run plt.figure(figsize=(16, 32)) hidden_layer_sizes = [1, 2, 3, 4, 5, 10, 20] for i, n_h in enumerate(hidden_layer_sizes): #enumerate的作用就是对可迭代的数据进行标号并将其里面的数据和标号一并打印出来 plt.subplot(5, 2, i+1) plt.title('Hidden Layer of size %d' % n_h) parameters = nn_model_example(X, Y, n_h,num_iterations = 5000) plot_decision_boundary(lambda x: predict(parameters, x.T), X, Y) predictions = predict(parameters, X) accuracy = float((np.dot(Y,predictions.T) + np.dot(1-Y,1-predictions.T))/float(Y.size)*100) print ("Accuracy for {} hidden units: {} %".format(n_h, accuracy)) plt.show() # 较大的模型(具有更多隐藏的单元)能够更好地拟合训练集,直到最终最大的模型过拟合数据为止。 # 隐藏层的最佳大小似乎在n_h = 5左右。的确,此值似乎很好地拟合了数据,而又不会引起明显的过度拟合。 # 稍后你还将学习正则化,帮助构建更大的模型(例如n_h = 50)而不会过度拟合。