1. 导入相关库
import numpy as np
import matplotlib. pyplot as plt
import h5py
import skimage
import cv2
2. 加载数据集
def load_dataset ( ) :
"""从文件夹‘dataset’中加载数据"""
train_dataset = h5py. File( 'datasets/train_catvnoncat.h5' , 'r' )
train_set_x_orig = np. array( train_dataset[ 'train_set_x' ] [ : ] )
train_set_y_orig = np. array( train_dataset[ 'train_set_y' ] [ : ] )
test_data = h5py. File( 'datasets/test_catvnoncat.h5' , 'r' )
test_set_x_orig = np. array( test_data[ 'test_set_x' ] [ : ] )
test_set_y_orig = np. array( test_data[ 'test_set_y' ] [ : ] )
classes = np. array( test_data[ 'list_classes' ] [ : ] )
train_set_y_orig = train_set_y_orig. reshape( ( 1 , train_set_y_orig. shape[ 0 ] ) )
test_set_y_orig = test_set_y_orig. reshape( ( 1 , test_set_y_orig. shape[ 0 ] ) )
return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes
3. 工具函数–sigmoid
def sigmoid_activate ( z) :
"""
Args:
z: 可以是一个数,或一个数组
Returns: 经过一些列sigmoid算法计算后的值,在[0,1]范围内
"""
a = 1 / ( 1 + np. exp( - z) )
return a
4. 工具函数–权重初始化0
def initialize_with_zeros ( dim) :
"""
这个函数用于初始化权重数组w和偏置/阈值b.
Args:
dim: w的大小,看上面的神经网络模型图可知,dim在本例中是12288,因为一个特征输入对应一个权重。
Returns:
w: 权重数组
b: 偏置bias
"""
w = np. zeros( ( dim, 1 ) )
b = 0
return w, b
5. 工具函数–前向传播&反向传播
def propagate ( w, b, X, Y) :
"""
参数:
w -- 权重数组,维度是(12288, 1)
b -- 偏置bias
X -- 图片的特征数据,维度是 (12288, 209)
Y -- 图片对应的标签,0或1,0是无猫,1是有猫,维度是(1,209)
返回值:
cost -- 成本
dw -- w的梯度
db -- b的梯度
"""
m = X. shape[ 1 ]
A = sigmoid_activate( np. dot( w. T, X) + b)
cost = - np. sum ( Y * np. log( A) + ( 1 - Y) * np. log( 1 - A) ) / m
dZ = A - Y
dw = np. dot( X, dZ. T) / m
db = np. sum ( dZ) / m
grads = { "dw" : dw, "db" : db}
return grads, cost
6. 工具函数–优化w和b参数
def optimize ( w, b, X, Y, num_iterations, learning_rate, print_cost= False ) :
"""
本函数会得出优化后的参数w和b。
训练神经网络,其实就是通过使用海量数据来进行训练,从而得出这些优化后的参数。
有了这些参数后,我们就可以使用它们来进行预测了,
对于本文章来说,也就是可以用这些参数来对新的任意图片进行预测了,
预测图片里有猫或没有猫
参数:
w -- 权重数组,维度是 (12288, 1)
b -- 偏置bias
X -- 图片的特征数据,维度是 (12288, 209)
Y -- 图片对应的标签,0或1,0是无猫,1是有猫,维度是(1,209)
num_iterations -- 指定要优化多少次
learning_rate -- 学习步进,是我们用来控制优化步进的参数
print_cost -- 为True时,每优化100次就把成本cost打印出来,以便我们观察成本的变化
返回值:
params -- 优化后的w和b
costs -- 每优化100次,将成本记录下来,成本越小,表示参数越优化
"""
costs = [ ]
for i in range ( num_iterations) :
grads, cost = propagate( w, b, X, Y)
dw = grads[ 'dw' ]
db = grads[ 'db' ]
w = w - learning_rate * dw
b = b - learning_rate * db
if i % 100 == 0 :
costs. append( cost)
if print_cost:
print ( "优化%d次后的成本是:" % i, cost)
params = { "w" : w, "b" : b}
return params, costs
7. 工具函数–预测
def predict ( w, b, X) :
"""
本函数会使用输入的参数w和b来对输入的待预测数据X进行预测。
X可以是一张图片也可以是多张图片,当多张图片时,函数会给出对每张图片的预测结果。
参数:
w -- 权重数组,维度是 (12288, 1)
b -- 偏置bias
X -- 图片的特征数据,维度是 (12288, 图片张数)
返回值:
Y_prediction -- 对每张图片的预测结果
"""
m = X. shape[ 1 ]
Y_prediction = np. zeros( ( 1 , m) )
A = sigmoid_activate( np. dot( w. T, X) + b)
for i in range ( A. shape[ 1 ] ) :
if A[ 0 , i] >= 0.5 :
Y_prediction[ 0 , i] = 1
return Y_prediction
8. 构建神经网络模型
def model ( X_train, Y_train, X_test, Y_test, num_iterations= 2000 , learning_rate= 0.5 , print_cost= False ) :
"""
构建神经网络模型函数
参数:
X_train -- 训练图片,维度是(12288, 209)
Y_train -- 训练图片对应的标签,维度是 (1, 209)
X_test -- 测试图片,维度是(12288, 50)
Y_test -- 测试图片对应的标签,维度是 (1, 50)
num_iterations -- 需要训练/优化多少次
learning_rate -- 学习步进,是我们用来控制优化步进的参数
print_cost -- 为True时,每优化100次就把成本cost打印出来,以便我们观察成本的变化
返回值:
d -- 返回一些信息
"""
w, b = initialize_with_zeros( X_train. shape[ 0 ] )
parameters, costs = optimize( w, b, X_train, Y_train, num_iterations, learning_rate, print_cost)
w = parameters[ 'w' ]
b = parameters[ 'b' ]
Y_prediction_train = predict( w, b, X_train)
Y_prediction_test = predict( w, b, X_test)
print ( "对训练图片的预测准确率为: {}%" . format ( 100 - np. mean( np. abs ( Y_prediction_train - Y_train) ) * 100 ) )
print ( "对测试图片的预测准确率为: {}%" . format ( 100 - np. mean( np. abs ( Y_prediction_test - Y_test) ) * 100 ) )
d = { "costs" : costs,
"Y_prediction_test" : Y_prediction_test,
"Y_prediction_train" : Y_prediction_train,
"w" : w,
"b" : b,
"learning_rate" : learning_rate,
"num_iterations" : num_iterations}
return d
9. 编写主函数
def main ( ) :
train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset( )
index = 30
plt. imshow( train_set_x_orig[ index] )
plt. show( )
print ( "标签为" + str ( train_set_y[ : , index] ) + ", 这是一个'" + classes[ np. squeeze( train_set_y[ : , index] ) ] . decode(
"utf-8" ) + "' 图片." )
print ( 'train_set_x_orig.shape:' , train_set_x_orig. shape)
print ( 'train_set_y.shape:' , train_set_y. shape)
print ( "test_set_x_orig.shape:" , test_set_x_orig. shape)
print ( "test_set_y.shape:" , test_set_y. shape)
m_train = train_set_x_orig. shape[ 0 ]
m_test = test_set_x_orig. shape[ 0 ]
num_px = test_set_x_orig. shape[ 1 ]
print ( "训练样本数: m_train = " + str ( m_train) )
print ( "测试样本数: m_test = " + str ( m_test) )
print ( "每张图片的宽/高: num_px = " + str ( num_px) )
train_set_x_flatten = train_set_x_orig. reshape( train_set_x_orig. shape[ 0 ] , - 1 ) . T
test_set_x_flatten = test_set_x_orig. reshape( test_set_x_orig. shape[ 0 ] , - 1 ) . T
print ( "train_set_x_flatten.shape:" , train_set_x_flatten. shape)
print ( "test_set_x_flatten.shape:" , test_set_x_flatten. shape)
train_set_x = train_set_x_flatten / 255 .
test_set_x = test_set_x_flatten / 255 .
"""
前面已经加载了数据,并且对数据做了简单的预处理,
下面会构建一个如“神经网络模型结构.png”所示的神经网络模型对上面的数据进行运算
"""
d = model( train_set_x, train_set_y, test_set_x, test_set_y, num_iterations= 10000 , learning_rate= 0.005 ,
print_cost= True )
"""
从上面打印出的信息可知,随着优化的次数越来越多,成本越来越小,说明优化后的参数使预测越来越准确了。
对于训练图片的预测准确率达到了99%。 对于测试图片是70%,其实已经很不错了,
因为出于教学的目的,我们的训练数据集很小,而且构建的是最最简单的单神经元神经网络。
"""
index = 8
plt. imshow( test_set_x[ : , index] . reshape( ( num_px, num_px, 3 ) ) )
print ( "这张图的标签是 " + str ( test_set_y[ 0 , index] ) + ", 预测结果是 " + str ( int ( d[ "Y_prediction_test" ] [ 0 , index] ) ) )
costs = np. squeeze( d[ 'costs' ] )
plt. plot( costs)
plt. xlabel( "iterations (per hundreds)" )
plt. ylabel( "cost" )
plt. title( "learning rate = " + str ( d[ 'learning_rate' ] ) )
plt. show( )
learning_rates = [ 0.01 , 0.001 , 0.0001 ]
models = { }
for i in learning_rates:
print ( "学习率为: " + str ( i) + "时" )
models[ str ( i) ] = model( train_set_x, train_set_y, test_set_x, test_set_y,
num_iterations= 1500 , learning_rate= i, print_cost= False )
print ( '\n' + "-------------------------------------------------------" + '\n' )
for i in learning_rates:
plt. plot( np. squeeze( models[ str ( i) ] [ "costs" ] ) , label= str ( models[ str ( i) ] [ "learning_rate" ] ) )
plt. ylabel( 'cost' )
plt. xlabel( 'iterations (hundreds)' )
legend = plt. legend( loc= 'upper center' , shadow= True )
frame = legend. get_frame( )
frame. set_facecolor( '0.90' )
plt. show( )
my_image = "my_image1.jpg"
fname = "images/" + my_image
image = np. array( plt. imread( fname) )
my_image = cv2. resize( image, ( num_px, num_px) ) . reshape( ( 1 , num_px * num_px * 3 ) ) . T
my_predicted_image = predict( d[ "w" ] , d[ "b" ] , my_image)
plt. imshow( image)
print ( "预测结果为 " + str ( int ( np. squeeze( my_predicted_image) ) ) )
10. 运行主函数
if __name__ == '__main__' :
main( )
11. 完整代码
import numpy as np
import matplotlib. pyplot as plt
import h5py
import skimage
import tensorflow as tf
import cv2
def load_dataset ( ) :
"""从文件夹‘dataset’中加载数据"""
train_dataset = h5py. File( 'datasets/train_catvnoncat.h5' , 'r' )
train_set_x_orig = np. array( train_dataset[ 'train_set_x' ] [ : ] )
train_set_y_orig = np. array( train_dataset[ 'train_set_y' ] [ : ] )
test_data = h5py. File( 'datasets/test_catvnoncat.h5' , 'r' )
test_set_x_orig = np. array( test_data[ 'test_set_x' ] [ : ] )
test_set_y_orig = np. array( test_data[ 'test_set_y' ] [ : ] )
classes = np. array( test_data[ 'list_classes' ] [ : ] )
train_set_y_orig = train_set_y_orig. reshape( ( 1 , train_set_y_orig. shape[ 0 ] ) )
test_set_y_orig = test_set_y_orig. reshape( ( 1 , test_set_y_orig. shape[ 0 ] ) )
return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes
def sigmoid_activate ( z) :
"""
Args:
z: 可以是一个数,或一个数组
Returns: 经过一些列sigmoid算法计算后的值,在[0,1]范围内
"""
a = 1 / ( 1 + np. exp( - z) )
return a
def initialize_with_zeros ( dim) :
"""
这个函数用于初始化权重数组w和偏置/阈值b.
Args:
dim: w的大小,看上面的神经网络模型图可知,dim在本例中是12288,因为一个特征输入对应一个权重。
Returns:
w: 权重数组
b: 偏置bias
"""
w = np. zeros( ( dim, 1 ) )
b = 0
return w, b
def propagate ( w, b, X, Y) :
"""
参数:
w -- 权重数组,维度是(12288, 1)
b -- 偏置bias
X -- 图片的特征数据,维度是 (12288, 209)
Y -- 图片对应的标签,0或1,0是无猫,1是有猫,维度是(1,209)
返回值:
cost -- 成本
dw -- w的梯度
db -- b的梯度
"""
m = X. shape[ 1 ]
A = sigmoid_activate( np. dot( w. T, X) + b)
cost = - np. sum ( Y * np. log( A) + ( 1 - Y) * np. log( 1 - A) ) / m
dZ = A - Y
dw = np. dot( X, dZ. T) / m
db = np. sum ( dZ) / m
grads = { "dw" : dw, "db" : db}
return grads, cost
def optimize ( w, b, X, Y, num_iterations, learning_rate, print_cost= False ) :
"""
本函数会得出优化后的参数w和b。
训练神经网络,其实就是通过使用海量数据来进行训练,从而得出这些优化后的参数。
有了这些参数后,我们就可以使用它们来进行预测了,
对于本文章来说,也就是可以用这些参数来对新的任意图片进行预测了,
预测图片里有猫或没有猫
参数:
w -- 权重数组,维度是 (12288, 1)
b -- 偏置bias
X -- 图片的特征数据,维度是 (12288, 209)
Y -- 图片对应的标签,0或1,0是无猫,1是有猫,维度是(1,209)
num_iterations -- 指定要优化多少次
learning_rate -- 学习步进,是我们用来控制优化步进的参数
print_cost -- 为True时,每优化100次就把成本cost打印出来,以便我们观察成本的变化
返回值:
params -- 优化后的w和b
costs -- 每优化100次,将成本记录下来,成本越小,表示参数越优化
"""
costs = [ ]
for i in range ( num_iterations) :
grads, cost = propagate( w, b, X, Y)
dw = grads[ 'dw' ]
db = grads[ 'db' ]
w = w - learning_rate * dw
b = b - learning_rate * db
if i % 100 == 0 :
costs. append( cost)
if print_cost:
print ( "优化%d次后的成本是:" % i, cost)
params = { "w" : w, "b" : b}
return params, costs
def predict ( w, b, X) :
"""
本函数会使用输入的参数w和b来对输入的待预测数据X进行预测。
X可以是一张图片也可以是多张图片,当多张图片时,函数会给出对每张图片的预测结果。
参数:
w -- 权重数组,维度是 (12288, 1)
b -- 偏置bias
X -- 图片的特征数据,维度是 (12288, 图片张数)
返回值:
Y_prediction -- 对每张图片的预测结果
"""
m = X. shape[ 1 ]
Y_prediction = np. zeros( ( 1 , m) )
A = sigmoid_activate( np. dot( w. T, X) + b)
for i in range ( A. shape[ 1 ] ) :
if A[ 0 , i] >= 0.5 :
Y_prediction[ 0 , i] = 1
return Y_prediction
"""到此,我们已经编写了所需的所有工具函数了。下面我们将这些函数组合起来,构建出一个最终的神经网络模型函数。"""
def model ( X_train, Y_train, X_test, Y_test, num_iterations= 2000 , learning_rate= 0.5 , print_cost= False ) :
"""
构建神经网络模型函数
参数:
X_train -- 训练图片,维度是(12288, 209)
Y_train -- 训练图片对应的标签,维度是 (1, 209)
X_test -- 测试图片,维度是(12288, 50)
Y_test -- 测试图片对应的标签,维度是 (1, 50)
num_iterations -- 需要训练/优化多少次
learning_rate -- 学习步进,是我们用来控制优化步进的参数
print_cost -- 为True时,每优化100次就把成本cost打印出来,以便我们观察成本的变化
返回值:
d -- 返回一些信息
"""
w, b = initialize_with_zeros( X_train. shape[ 0 ] )
parameters, costs = optimize( w, b, X_train, Y_train, num_iterations, learning_rate, print_cost)
w = parameters[ 'w' ]
b = parameters[ 'b' ]
Y_prediction_train = predict( w, b, X_train)
Y_prediction_test = predict( w, b, X_test)
print ( "对训练图片的预测准确率为: {}%" . format ( 100 - np. mean( np. abs ( Y_prediction_train - Y_train) ) * 100 ) )
print ( "对测试图片的预测准确率为: {}%" . format ( 100 - np. mean( np. abs ( Y_prediction_test - Y_test) ) * 100 ) )
d = { "costs" : costs,
"Y_prediction_test" : Y_prediction_test,
"Y_prediction_train" : Y_prediction_train,
"w" : w,
"b" : b,
"learning_rate" : learning_rate,
"num_iterations" : num_iterations}
return d
def main ( ) :
train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset( )
index = 30
plt. imshow( train_set_x_orig[ index] )
plt. show( )
print ( "标签为" + str ( train_set_y[ : , index] ) + ", 这是一个'" + classes[ np. squeeze( train_set_y[ : , index] ) ] . decode(
"utf-8" ) + "' 图片." )
print ( 'train_set_x_orig.shape:' , train_set_x_orig. shape)
print ( 'train_set_y.shape:' , train_set_y. shape)
print ( "test_set_x_orig.shape:" , test_set_x_orig. shape)
print ( "test_set_y.shape:" , test_set_y. shape)
m_train = train_set_x_orig. shape[ 0 ]
m_test = test_set_x_orig. shape[ 0 ]
num_px = test_set_x_orig. shape[ 1 ]
print ( "训练样本数: m_train = " + str ( m_train) )
print ( "测试样本数: m_test = " + str ( m_test) )
print ( "每张图片的宽/高: num_px = " + str ( num_px) )
train_set_x_flatten = train_set_x_orig. reshape( train_set_x_orig. shape[ 0 ] , - 1 ) . T
test_set_x_flatten = test_set_x_orig. reshape( test_set_x_orig. shape[ 0 ] , - 1 ) . T
print ( "train_set_x_flatten.shape:" , train_set_x_flatten. shape)
print ( "test_set_x_flatten.shape:" , test_set_x_flatten. shape)
train_set_x = train_set_x_flatten / 255 .
test_set_x = test_set_x_flatten / 255 .
"""
前面已经加载了数据,并且对数据做了简单的预处理,
下面会构建一个如“神经网络模型结构.png”所示的神经网络模型对上面的数据进行运算
"""
d = model( train_set_x, train_set_y, test_set_x, test_set_y, num_iterations= 10000 , learning_rate= 0.005 ,
print_cost= True )
"""
从上面打印出的信息可知,随着优化的次数越来越多,成本越来越小,说明优化后的参数使预测越来越准确了。
对于训练图片的预测准确率达到了99%。 对于测试图片是70%,其实已经很不错了,
因为出于教学的目的,我们的训练数据集很小,而且构建的是最最简单的单神经元神经网络。
"""
index = 8
plt. imshow( test_set_x[ : , index] . reshape( ( num_px, num_px, 3 ) ) )
print ( "这张图的标签是 " + str ( test_set_y[ 0 , index] ) + ", 预测结果是 " + str ( int ( d[ "Y_prediction_test" ] [ 0 , index] ) ) )
costs = np. squeeze( d[ 'costs' ] )
plt. plot( costs)
plt. xlabel( "iterations (per hundreds)" )
plt. ylabel( "cost" )
plt. title( "learning rate = " + str ( d[ 'learning_rate' ] ) )
plt. show( )
learning_rates = [ 0.01 , 0.001 , 0.0001 ]
models = { }
for i in learning_rates:
print ( "学习率为: " + str ( i) + "时" )
models[ str ( i) ] = model( train_set_x, train_set_y, test_set_x, test_set_y,
num_iterations= 1500 , learning_rate= i, print_cost= False )
print ( '\n' + "-------------------------------------------------------" + '\n' )
for i in learning_rates:
plt. plot( np. squeeze( models[ str ( i) ] [ "costs" ] ) , label= str ( models[ str ( i) ] [ "learning_rate" ] ) )
plt. ylabel( 'cost' )
plt. xlabel( 'iterations (hundreds)' )
legend = plt. legend( loc= 'upper center' , shadow= True )
frame = legend. get_frame( )
frame. set_facecolor( '0.90' )
plt. show( )
my_image = "my_image1.jpg"
fname = "images/" + my_image
image = np. array( plt. imread( fname) )
my_image = cv2. resize( image, ( num_px, num_px) ) . reshape( ( 1 , num_px * num_px * 3 ) ) . T
my_predicted_image = predict( d[ "w" ] , d[ "b" ] , my_image)
plt. imshow( cv2. resize( image, ( num_px, num_px) ) )
plt. show( )
print ( "预测结果为 " + str ( int ( np. squeeze( my_predicted_image) ) ) )
if __name__ == '__main__' :
main( )