题目:
输入图片,判断是否是猫。是猫输出y=1,否则y=0。
数据集:
- train_catvnoncat.h5:
训练集,维度为:(209,64,64,3),即209张64*64的彩色图片 - test_catvnoncat.h5:
测试集,维度为:(50,64,64,3),即50张64*64的彩色图片
代码编写:
- 加载数据集:
h5和字典类似,通过key去取对应的value。
train_catvnoncat.h5有三个key,分别是list_classes(类别)、train_set_x(图片集合)、 train_set_y(标签)
需要对图片做的处理是将(64,64 ,3)的图片平铺成一维数组,作为模型的输入。
#加载和处理数据
def load_data():
#获取原始训练集数据
train_data_org = h5py.File('train_catvnoncat.h5')
test_data_org = h5py.File('test_catvnoncat.h5')
#h5文件类似字典类型,通过key去获取value
for key in train_data_org.keys():
print(key)
'''
输出:
list_classes
train_set_x
train_set_y
'''
#获取训练集数据
train_x_org = train_data_org["train_set_x"]
train_y_org = train_data_org["train_set_y"]
#获取测试集数据
test_x_org = test_data_org["test_set_x"]
test_y_org = test_data_org["test_set_y"]
#维度和样本数量
#feature维度
n = np.shape(train_x_org)[1]*np.shape(train_x_org)[2]*np.shape(train_x_org)[3]
#训练集样本数
m_train = np.shape(train_x_org)[0]
#测试集样本数
m_test = np.shape(test_x_org)[0]
#将标签平坦成一维数组
train_y = np.array(train_y_org).reshape(1,m_train)
test_y = np.array(test_y_org).reshape(1,m_test)
#x数据集是64x*64*3*m的图片数组,将数据集转成12288*m的图片数组
train_x = np.array(train_x_org).reshape(m_train,-1).T
#reshape(n,m_train)
test_x = np.array(test_x_org).reshape(m_test, -1).T
#reshape(n,m_test)
return train_x_org,test_x_org,train_x,test_x,train_y,test_y
- 定义激活函数和函数的导数
def sigmoid(z):
return 1/(1+np.exp(-z))
def linear(z):
return z
def relu(z):
return np.maximum(0,z)
def back_sigmoid(z):
return sigmoid(z)*(1-sigmoid(z))
def back_relu(z):
z[z<=0]=0
z[z>0]=1
return z
- 初始化参数
以三层神经网络为例子,各层参数维度参考如下:
- 用字典parameters 来存储w和b,第6层的w和b对应的key为w6和b6
- units_num:各层神经结点数;上图的神经网络表示为[3,2,2,1]
def init_parameters(units_num):
#字典存取各层参数
parameters = {}
#获取层数(包含输入层)
layers_len = len(units_num)
for index in range(1,layers_len):
parameters['w'+str(index)] = np.random.randn(units_num[index],units_num[index-1])/ np.sqrt(units_num[index-1])
parameters['b'+str(index)] = np.zeros((units_num[index],1))
return parameters
/ np.sqrt(units_num[index-1])是为了解决神经网络中的梯度消失和梯度爆炸
4. 前向传播
-
前向传播输入a[l-1],输出z[l]=w a[l-1] +b[l],z[l]经过激活函数得到输出a[l]
-
采用cache列表存储第0层-输出层的z和a参数字典,
即cache[0]存的是输入层的z和a,都是x(输入层没有z和a,但是为了方便for循环统一编写)。
cache[1]存的是第1层的z[1]和a[1] -
parameters字典:存储各层的w和b
-
activation_fun 是一个字符串,用字符串可以通过activation_fun字典获取函数名,从而调用激活函数,若三层神经网络第1层和第2层的激活函数是relu,输出层的激活函数是sigmoid,activation_fun 表示为:[‘relu’,‘relu’,‘sigmoid’]
def forward_propagation(x,parameters,activation):
activation_fun = {'sigmoid':sigmoid,'relu':relu,'linear':linear}
#存储各层前向传播的cache字典
cache = []
#包含输入层 /浮点数除法 //整数除法,返回不大于当前数的最大整数
layers_len = len(parameters)//2 + 1
#第0层的canche为输入的x,线性激活函数输出的a还是x
cache_0 = {'z':x,'a':x}
cache.append(cache_0)
for index in range(1,layers_len):
#获取前一层的输出,当作当前层的输入。即a[l-1]=z[l]
a_pre = cache[index-1]['a']
#获取当层结点的输入z
z = np.dot(parameters['w'+str(index)],a_pre)+parameters['b'+str(index)]
#经过输出函数,获取输出a
a = activation_fun[activation[index]](z)
#打包成字典
cache_ ={'z':z,'a':a}
cache.append(cache_)
return cache
- 反向传播
反向传播总结为几句话:从右到左传输da,通过da计算dz,再通过dz计算dw和db;最后计算前一层的da以继续计算dz、dw、db
-
先计算输出层的da:
-
然后从最后一层开始反向传播,逐层计算下面的式子
'''
y:真实的标签
cache:从输入层到输出层的z和a
parameters:第一层-输出层的w和b
back_activation:字符串数组,通过字符串调用back_activation_fun 字典对应的函数
'''
def back_propagation(y,cache,parameters,back_activation):
back_activation_fun = {'sigmoid':back_sigmoid,'relu':back_relu}
#存储梯度
grads = {}
#不包含输出层
layers_len = len(parameters)//2
#样本数量
m = np.shape(y)[1]
#预测的输出
a_L = cache[layers_len]['a']
#计算最后一层的da
da = - (np.divide(y, a_L) - np.divide(1 - y, 1 - a_L))
#从最后一层L开始后向传播
for index in range(layers_len,0,-1):
z = cache[index]['z']
a = cache[index-1]['a']
#先获取da,再计算dz,然后计算db和dw
dz = da*back_activation_fun[back_activation[index]](z)
dw = (1/m)*np.dot(dz,a.T)
db = (1/m)*np.sum(dz,axis=1,keepdims=True)
w = parameters['w'+str(index)]
#计算新的da,继续反向传播
da = np.dot(w.T,dz)
grads['dw'+str(index)] = dw
grads['db'+str(index)] = db
return grads
- 更新参数
'''
grads:字典类型,第1层到输出层的梯度dw和db
parameters:字典类型,第1层到输出层的w和b
alpha:学习率
'''
def update_parameters(grads,parameters,alpha):
#包含输入层
layers_len = len(parameters)//2 +1
for index in range(1,layers_len):
parameters['w'+str(index)] = parameters['w'+str(index)] - alpha*grads['dw'+str(index)]
parameters['b'+str(index)] = parameters['b'+str(index)] - alpha*grads['db'+str(index)]
return parameters
- 预测、代价函数、准确率
#预测就是前向传播,输出的y_pred中大于0.5归为0,否则归为1
def predict(parameters,x,activation):
layers_len = len(activation)
cache = forward_propagation(x,parameters,activation)
y_pred = cache[layers_len-1]['a']
y_pred[y_pred>=0.5]=1
y_pred[y_pred<0.5]=0
return y_pred
def Accuracy(y_pred,y):
return 100*(1-np.mean(np.abs(y-y_pred)))
def cost_function(a,y):
#样本数量,len(y)是错的,输出为1.
m = np.shape(y)[1]
cost = -np.sum(y*np.log(a)+(1-y)*np.log(1-a))
return (1/m)*cost
- 模型整合
参数初始化–>迭代【前向传播–>反向传播获取梯度–>更新参数】
def NN_Model(x,y,units_num,activation,iterations,alpha):
#初始化参数
parameters = init_parameters(units_num)
costs = []
m = np.shape(y)[1]
layers_len = len(units_num)
for i in range(iterations):
#前向传播,cache第0层是输入层,第layers_len-1是输出层
cache = forward_propagation(x,parameters,activation)
if i%100 == 0:
#计算成本
cost = cost_function(cache[layers_len-1]['a'],y)
costs.append(cost)
print("第",i,"次迭代,erro:",cost)
grads = back_propagation(y,cache,parameters,activation)
parameters = update_parameters(grads,parameters,alpha)
return parameters
- 调用
#加载数据
train_x_org,test_x_org,train_x,test_x,train_y,test_y = load_data()
#数据标准化
train_x,test_x = train_x/255,test_x/255
print('原始训练集数据维度:',train_x_org.shape)
print('训练集数据维度:',train_x.shape)
#模型调用
parameters = NN_Model(train_x,train_y,[12288,20,7,5,1],['linear','relu','relu','relu','sigmoid'],3000,0.1)
- 实验结果
list_classes
train_set_x
train_set_y
原始训练集数据维度: (209, 64, 64, 3)
训练集数据维度: (12288, 209)
第 0 次迭代,erro: 0.69569182130787
第 100 次迭代,erro: 0.5325561262879578
第 200 次迭代,erro: 0.4687153528081764
第 300 次迭代,erro: 0.43803808328718974
第 400 次迭代,erro: 0.3943395471502661
第 500 次迭代,erro: 0.3345119833209391
第 600 次迭代,erro: 0.3202016827317832
第 700 次迭代,erro: 0.2811022916205541
第 800 次迭代,erro: 0.2638037584927056
第 900 次迭代,erro: 0.2468532028536798
第 1000 次迭代,erro: 0.2291667206511114
第 1100 次迭代,erro: 0.21796051510557868
第 1200 次迭代,erro: 0.20535045849120123
第 1300 次迭代,erro: 0.1936631369771659
第 1400 次迭代,erro: 0.18195387546243838
第 1500 次迭代,erro: 0.1749647917240423
第 1600 次迭代,erro: 0.16523048502137094
第 1700 次迭代,erro: 0.1585112448076189
第 1800 次迭代,erro: 0.15146900170566005
第 1900 次迭代,erro: 0.14651661611917827
第 2000 次迭代,erro: 0.140591160250609
第 2100 次迭代,erro: 0.13463273296920547
第 2200 次迭代,erro: 0.12971410105394196
第 2300 次迭代,erro: 0.12521383803165206
第 2400 次迭代,erro: 0.12104516279187778
第 2500 次迭代,erro: 0.11709692701758376
第 2600 次迭代,erro: 0.11345603409162
第 2700 次迭代,erro: 0.11013416562980305
第 2800 次迭代,erro: 0.10687306482838707
第 2900 次迭代,erro: 0.1035916649323184
第 3000 次迭代,erro: 0.10078055362700247
第 3100 次迭代,erro: 0.0981029375505057
第 3200 次迭代,erro: 0.09552276363010712
第 3300 次迭代,erro: 0.09297137379600809
第 3400 次迭代,erro: 0.09093232206085816
- 测试集
y_pred_test = predict(parameters,test_x,['linear','relu','relu','relu','sigmoid'])
y_pred_train = predict(parameters,train_x,['linear','relu','relu','relu','sigmoid'])
#准确率
print('训练集准确率:',Accuracy(train_y,y_pred_train),"%")
print('测试集准确率:',Accuracy(test_y,y_pred_test),"%")
测试集正确率:76%,相比LR回归的正确率高出不少。
- 预测新图
new_pic = plt.imread('zoro.png')
from skimage import transform
new_pic = transform.resize(new_pic,(64,64))
plt.imshow(new_pic)
x = new_pic.reshape(64*64*3,1)/255
predict(parameters,x,['linear','relu','relu','relu','sigmoid'])
完整代码
import h5py
import numpy as np
import matplotlib.pyplot as plt
#加载和处理数据
def load_data():
#获取原始训练集数据
train_data_org = h5py.File('train_catvnoncat.h5')
test_data_org = h5py.File('test_catvnoncat.h5')
#h5文件类似字典类型,通过key去获取value
for key in train_data_org.keys():
print(key)
'''
输出:
list_classes
train_set_x
train_set_y
'''
#获取训练集数据
train_x_org = train_data_org["train_set_x"]
train_y_org = train_data_org["train_set_y"]
#获取测试集数据
test_x_org = test_data_org["test_set_x"]
test_y_org = test_data_org["test_set_y"]
#维度和样本数量
#feature维度
n = np.shape(train_x_org)[1]*np.shape(train_x_org)[2]*np.shape(train_x_org)[3]
#训练集样本数
m_train = np.shape(train_x_org)[0]
#测试集样本数
m_test = np.shape(test_x_org)[0]
#将标签平坦成一维数组
train_y = np.array(train_y_org).reshape(1,m_train)
test_y = np.array(test_y_org).reshape(1,m_test)
#x数据集是64x*64*3*m的图片数组,将数据集转成12288*m的图片数组
train_x = np.array(train_x_org).reshape(m_train,-1).T
#reshape(n,m_train)
test_x = np.array(test_x_org).reshape(m_test, -1).T
#reshape(n,m_test)
return train_x_org,test_x_org,train_x,test_x,train_y,test_y
def sigmoid(z):
return 1/(1+np.exp(-z))
def linear(z):
return z
def relu(z):
return np.maximum(0,z)
def back_sigmoid(z):
return sigmoid(z)*(1-sigmoid(z))
def back_relu(z):
z[z<=0]=0
z[z>0]=1
return z
def init_parameters(units_num):
#字典存取各层参数
parameters = {}
#获取层数(包含输入层)
layers_len = len(units_num)
for index in range(1,layers_len):
parameters['w'+str(index)] = np.random.randn(units_num[index],units_num[index-1])/ np.sqrt(units_num[index-1])
parameters['b'+str(index)] = np.zeros((units_num[index],1))
return parameters
def forward_propagation(x,parameters,activation):
activation_fun = {'sigmoid':sigmoid,'relu':relu,'linear':linear}
#存储各层前向传播的cache字典
cache = []
#包含输入层 /浮点数除法 //整数除法,返回不大于当前数的最大整数
layers_len = len(parameters)//2 + 1
#第0层的canche为输入的x,线性激活函数输出的a还是x
cache_0 = {'z':x,'a':x}
cache.append(cache_0)
for index in range(1,layers_len):
a_pre = cache[index-1]['a']
#获取当层结点的输入z
z = np.dot(parameters['w'+str(index)],a_pre)+parameters['b'+str(index)]
#经过输出函数,获取输出a
a = activation_fun[activation[index]](z)
cache_ ={'z':z,'a':a}
cache.append(cache_)
return cache
def cost_function(a,y):
#样本数量,len(y)是错的,输出为1.
m = np.shape(y)[1]
cost = -np.sum(y*np.log(a)+(1-y)*np.log(1-a))
return (1/m)*cost
'''
第l层的cache和da
l-1层的输入pre_a
样本数量m
'''
def back_propagation(y,cache,parameters,back_activation):
back_activation_fun = {'sigmoid':back_sigmoid,'relu':back_relu}
#存储梯度
grads = {}
#不包含输出层
layers_len = len(parameters)//2
#预测的输出
a_L = cache[layers_len]['a']
m = np.shape(y)[1]
da = - (np.divide(y, a_L) - np.divide(1 - y, 1 - a_L))
#从最后一层L开始后向传播
for index in range(layers_len,0,-1):
z = cache[index]['z']
a = cache[index-1]['a']
#m = np.shape(cache[index-1]['a'])[1]
#先获取da,再计算dz,然后计算db和dw
dz = da*back_activation_fun[back_activation[index]](z)
dw = (1/m)*np.dot(dz,a.T)
db = (1/m)*np.sum(dz,axis=1,keepdims=True)
w = parameters['w'+str(index)]
#计算新的da,继续反向传播
da = np.dot(w.T,dz)
grads['dw'+str(index)] = dw
grads['db'+str(index)] = db
return grads
def update_parameters(grads,parameters,alpha):
#包含输入层
layers_len = len(parameters)//2 +1
for index in range(1,layers_len):
parameters['w'+str(index)] = parameters['w'+str(index)] - alpha*grads['dw'+str(index)]
parameters['b'+str(index)] = parameters['b'+str(index)] - alpha*grads['db'+str(index)]
return parameters
def predict(parameters,x,activation):
layers_len = len(activation)
cache = forward_propagation(x,parameters,activation)
y_pred = cache[layers_len-1]['a']
y_pred[y_pred>=0.5]=1
y_pred[y_pred<0.5]=0
return y_pred
def Accuracy(y_pred,y):
return 100*(1-np.mean(np.abs(y-y_pred)))
def NN_Model(x,y,units_num,activation,iterations,alpha):
#初始化参数
parameters = init_parameters(units_num)
costs = []
m = np.shape(y)[1]
layers_len = len(units_num)
for i in range(iterations):
#前向传播,cache第0层是输入层,第layers_len-1是输出层
cache = forward_propagation(x,parameters,activation)
if i%100 == 0:
#计算成本
cost = cost_function(cache[layers_len-1]['a'],y)
costs.append(cost)
print("第",i,"次迭代,erro:",cost)
grads = back_propagation(y,cache,parameters,activation)
parameters = update_parameters(grads,parameters,alpha)
plt.plot(costs)
return parameters
train_x_org,test_x_org,train_x,test_x,train_y,test_y = load_data()
train_x,test_x = train_x/255,test_x/255
print('原始训练集数据维度:',train_x_org.shape)
print('训练集数据维度:',train_x.shape)
parameters = NN_Model(train_x,train_y,[12288,20,7,5,1],['linear','relu','relu','relu','sigmoid'],3500,0.1)
y_pred_test = predict(parameters,test_x,['linear','relu','relu','relu','sigmoid'])
y_pred_train = predict(parameters,train_x,['linear','relu','relu','relu','sigmoid'])
#准确率
print('训练集准确率:',Accuracy(train_y,y_pred_train),"%")
print('测试集准确率:',Accuracy(test_y,y_pred_test),"%")
注意点
- 为了避免梯度爆炸和消失,在初始化时要进行处理