一、问题描述(单隐藏层的平面分类问题)
相关资料下载:Planar data classification
在Planar data数据集中分布着众多散点,X保存着每个散点的坐标,Y保存着每个散点对应的标签(0:红色,1:蓝色),要求用单隐层神经网络对该平面图形进行分类。
二、具体实现
引入机器学习库和相关工具
import numpy as np # 科学计算包
import matplotlib.pyplot as plt # 绘图包
import sklearn # 提供数据挖掘和分析的工具
import sklearn.datasets
import sklearn.linear_model
from planar_utils import plot_decision_boundary, sigmoid, load_planar_dataset, load_extra_datasets # 加载数据集和一些必要的工具
加载数据集并可视化散点
# 加载数据集
X,Y = load_planar_dataset()
# 绘制散点图,X是含坐标的矩阵,Y是标签 红0蓝1
plt.scatter(X[0, :], X[1, :], c=np.squeeze(Y), s=40, cmap=plt.cm.Spectral)
plt.title("scatter pic")
#打印矩阵维度和数量
shape_X = X.shape
shape_Y = Y.shape
m = Y.shape[1]
print("X的维度: " + str(shape_X))
print("Y的维度: " + str(shape_Y))
print("样本数量: " + str(m))
逻辑回归验证正确率
LogisticRegressionCV使用交叉验证选择正则化系数C,而LogisticRegression每次指定正则化系数
# 逻辑回归验证准确度
clf = sklearn.linear_model.LogisticRegressionCV() # LogisticRegressionCV使用交叉验证选择正则化系数C,而LogisticRegression每次指定正则化系数
clf.fit(X.T, Y.T)
绘制图像边界 计算准确度
计算准确度的时候分开计算蓝色和红色匹配的数量
np.dot(Y, LR_predictions) + np.dot(1 - Y,1 - LR_predictions)
plot_decision_boundary(lambda x: clf.predict(x), X, np.squeeze(Y)) #绘制决策边界
plt.title("Logistic Regression")
LR_predictions = clf.predict(X.T)
print ("逻辑回归的准确度: %d" % float((np.dot(Y, LR_predictions) + np.dot(1 - Y,1 - LR_predictions)) / float(Y.size) * 100) + "% " + "(正确标记的数据点所占的百分比)")
print("同为1的数量: " , np.dot(Y, LR_predictions))
print("同为0的数量: " , np.dot(1 - Y,1 - LR_predictions))
定义神经网络结构
# 定义神经网络结构
def layer_size(X, Y):
n_x = X.shape[0]
n_y = Y.shape[0]
n_h = 4
return (n_x, n_h, n_y)
参数的随机初始化
# 参数的随机初始化
def initialization(n_x, n_h, n_y):
# np.random.seed(2)
W1 = np.random.randn(n_h, n_x)*0.01
b1 = np.random.randn(n_h, 1)
W2 = np.random.randn(n_y, n_h)*0.01
b2 = np.random.randn(n_y, 1)
parameters = {
"W1": W1,
"b1": b1,
"W2": W2,
"b2": b2
}
return parameters
前向传播计算结果
# 前向传播计算结果
def forward_propagation(X, parameters):
W1 = parameters["W1"]
b1 = parameters["b1"]
W2 = parameters["W2"]
b2 = parameters["b2"]
Z1 = np.dot(W1, X)+b1
A1 = np.tanh(Z1)
Z2 = np.dot(W2, A1)+b2
A2 = sigmoid(Z2)
# cache是网络块中的计算结果缓存
cache = {
"Z1": Z1,
"A1": A1,
"Z2": Z2,
"A2": A2
}
return (A2, cache)
计算成本函数
# 计算成本函数
def cost_function(A2, Y, parameters):
m = Y.shape[1]
W1 = parameters["W1"]
W2 = parameters["W2"]
logprobs = np.multiply(np.log(A2), Y)+np.multiply((1-Y), np.log(1-A2))
cost = -np.sum(logprobs)/m
cost = float(np.squeeze(cost))
return cost
反向传播计算梯度
# 反向传播计算梯度
def backward_propagation(parameters, cache, X, Y):
m = Y.shape[1]
W1 = parameters["W1"]
W2 = parameters["W2"]
A1 = cache["A1"]
A2 = cache["A2"]
dZ2 = A2 - Y
dW2 = np.dot(dZ2, A1.T)/m
db2 = np.sum(dZ2, axis=1, keepdims=True)/m
dZ1 = np.multiply(np.dot(W2.T, dZ2), 1-np.power(A1, 2)) # 这一步计算式比较难理解
dW1 = np.dot(dZ1, X.T)/m
db1 = np.sum(dZ1, axis=1, keepdims=True)/m
# 梯度
grads = {
"dW1": dW1,
"db1": db1,
"dW2": dW2,
"db2": db2
}
return grads
更新参数
# 更新参数
def update_para(parameters, grads, learning_rate):
W1, W2 = parameters["W1"], parameters["W2"]
b1, b2 = parameters["b1"], parameters["b2"]
dW1, dW2 = grads["dW1"], grads["dW2"]
db1, db2 = grads["db1"], grads["db2"]
W1 = W1 - learning_rate * dW1
b1 = b1 - learning_rate * db1
W2 = W2 - learning_rate * dW2
b2 = b2 - learning_rate * db2
# 更新后的参数
parameters = {
"W1": W1,
"b1": b1,
"W2": W2,
"b2": b2
}
return parameters
整合函数到神经网络模型中
# 整合函数到神经网络模型中
def nn_model(X, Y, n_h, num_iterations, print_cost=False):
n_x = layer_size(X, Y)[0]
n_y = layer_size(X, Y)[2]
parameters = initialization(n_x, n_h, n_y)
W1 = parameters["W1"]
b1 = parameters["b1"]
W2 = parameters["W2"]
b2 = parameters["b2"]
for i in range(num_iterations):
A2, cache = forward_propagation(X, parameters) # 前向传播计算结果和缓存
cost = cost_function(A2, Y, parameters) # 计算成本
grads = backward_propagation(parameters, cache, X, Y) # 反向传播计算梯度
parameters = update_para(parameters, grads, learning_rate=0.5) # 梯度下降更新参数
# 如果打印结果,则1000步打印一次结果
if(print_cost):
if(i%1000 == 0):
print("iter " + str(i) + "cost: " + str(cost))
return parameters
通过前向传播预测结果
# 通过前向传播预测结果
def predict(parameters, X):
A2, cache = forward_propagation(X, parameters)
predictions = np.round(A2) # 取整数的好处是不会出现小数标签,绘制图形更好看
return predictions
运行神经网络
# 运行神经网络
n_h = 5
parameters = nn_model(X, Y, n_h, num_iterations=25000, print_cost=True)
plot_decision_boundary(lambda x: predict(parameters, x.T), X, np.squeeze(Y)) # 绘制边界图像
plt.title("Decision Boundary for hidden layer size " + str(n_h))
prediction = predict(parameters, X)
# 准确率的计算仍然是 匹配的点/总点数,匹配点的数量分同0和同1计算,如下
print ('准确率: %d' % float((np.dot(Y, prediction.T) + np.dot(1 - Y, 1 - prediction.T)) / float(Y.size) * 100) + '%')
训练结果
iter 0cost: 0.708071085308046
iter 1000cost: 0.30741440010693716
iter 2000cost: 0.26601981480091225
iter 3000cost: 0.25139451348937925
iter 4000cost: 0.2298323066007118
iter 5000cost: 0.2234268984966187
iter 6000cost: 0.21873641221070306
iter 7000cost: 0.21388024684888593
iter 8000cost: 0.19533195452554392
iter 9000cost: 0.1892464095610981
iter 10000cost: 0.18496394205003877
iter 11000cost: 0.18196909243120452
iter 12000cost: 0.17972243505532348
iter 13000cost: 0.17794110287197373
iter 14000cost: 0.17647612528102935
iter 15000cost: 0.17524134831706156
iter 16000cost: 0.17418234038607008
iter 17000cost: 0.17326220087879385
iter 18000cost: 0.1724545312969843
iter 19000cost: 0.17173966168538932
iter 20000cost: 0.1711024741907582
iter 21000cost: 0.17053106797985465
iter 22000cost: 0.17001589646671766
iter 23000cost: 0.16954918490791848
iter 24000cost: 0.169124522707884
准确率: 92%
准确率92%,对于该分类已经比较高了
更改数据集再分类
# 数据集
noisy_circles, noisy_moons, blobs, gaussian_quantiles, no_structure = load_extra_datasets()
datasets = {"noisy_circles": noisy_circles,
"noisy_moons": noisy_moons,
"blobs": blobs,
"gaussian_quantiles": gaussian_quantiles}
dataset = "noisy_moons"
X, Y = datasets[dataset]
X, Y = X.T, Y.reshape(1, Y.shape[0])
if dataset == "blobs":
Y = Y % 2
plt.scatter(X[0, :], X[1, :], c=np.squeeze(Y), s=40, cmap=plt.cm.Spectral)
#上一语句如出现问题请使用下面的语句:
plt.scatter(X[0, :], X[1, :], c=np.squeeze(Y), s=40, cmap=plt.cm.Spectral)
# 运行数据网络
n_h = 5
parameters = nn_model(X, Y, n_h, num_iterations=25000, print_cost=True)
plot_decision_boundary(lambda x: predict(parameters, x.T), X, np.squeeze(Y)) # 绘制边界图像
plt.title("Decision Boundary for hidden layer size " + str(n_h))
prediction = predict(parameters, X)
# 准确率的计算仍然是 匹配的点/总点数,匹配点的数量分同0和同1计算,如下
print ('准确率: %d' % float((np.dot(Y, prediction.T) + np.dot(1 - Y, 1 - prediction.T)) / float(Y.size) * 100) + '%')
iter 0cost: 0.6953767909586918
iter 1000cost: 0.2938169576936265
iter 2000cost: 0.07705498285251608
iter 3000cost: 0.062488993936143426
iter 4000cost: 0.058931589658356966
iter 5000cost: 0.056946048269672665
iter 6000cost: 0.055645528178731835
iter 7000cost: 0.054698321167242055
iter 8000cost: 0.05395184439329581
iter 9000cost: 0.05333117668530852
iter 10000cost: 0.052796360926562376
iter 11000cost: 0.05232364189395759
iter 12000cost: 0.05189625128163916
iter 13000cost: 0.05149926690336055
iter 14000cost: 0.051120974867204225
iter 15000cost: 0.05076631084235808
iter 16000cost: 0.05044577096739575
iter 17000cost: 0.05015438308266772
iter 18000cost: 0.04988493864244256
iter 19000cost: 0.049632881965477035
iter 20000cost: 0.04939438719967626
iter 21000cost: 0.04916468184485608
iter 22000cost: 0.04893603383575437
iter 23000cost: 0.04869585695442612
iter 24000cost: 0.04843254504777489
准确率: 98%