###### 人工智能与深度学习实例——DDN神经网络

import numpy as np
import h5py

# ------------------------DDN神经网络框架------------------------

# sigmoid 正向激活函数
def sigmoid(Z):

# 由线性单元导入参数 Z
# 导出 cache 存储Z值

A = 1.0 / (1 + np.exp(-Z))
cache = Z

return A, cache

# ReLu(Z) 正向激活函数
def ReLU(Z):
A = np.maximum(0,Z)

assert(A.shape == Z.shape)
cache = Z

return A, cache

# sigmoid 反向激活函数
def sigmoid_backward(dA, cache):

# dZ[l] = dA[l] * g[l]'( Z[l] )

Z = cache

f = 1.0 / (1 + np.exp(-Z))

dZ = dA * f * (1-f)

assert(dZ.shape == Z.shape)

return dZ

# ReLU 反向激活函数
def ReLU_backward(dA, cache):
Z = cache

dZ = np.array(dA,copy = True)
dZ[Z <= 0] = 0

assert(dZ.shape == Z.shape)

return dZ

# ------------------------正向传播------------------------

# 初始化隐藏层至输出层参数
def initialize_parameters(layer):
# layer -- 神经网络各层层数

L = len(layer)
parameters = {}

# 当前矩阵 W 的维度:  行 -- 当前神经元数目   列 -- 上层神经元数目
# 当前矩阵 b 的维度:  行 -- 当前神经元数目   列 -- 始终为 1
for i in range(1, L):
parameters["W" + str(i)] = np.random.randn(layer[i], layer[i-1])/np.sqrt(layer[i-1])
parameters["b" + str(i)] = np.zeros((layer[i], 1))

assert (parameters["W" + str(i)].shape == (layer[i], layer[i-1]))
assert (parameters["b" + str(i)].shape == (layer[i], 1))

return parameters

# 测试代码
'''
initialize_parameters([3,4,1])
'''

# 正向传播过程线性
def linear_forward(A_prev, W, b):

# A_prev -- 上一层的传递参数 A
# W,b -- 当前层的参数

# Z = W*A + b
Z = np.dot(W, A_prev) + b
assert (Z.shape == (W.shape[0], A_prev.shape[1]))

# 存储 A_prev W b
cache = (A_prev, W, b)

return Z, cache

# 测试代码
'''
A_prev = np.array([[1], [2], [3]])
parameters = initialize_parameters([3, 4, 1])
Z, cache = linear_forward(A_prev, parameters["W1"], parameters["b1"])
'''

# 正向传播 -- 线性->激活
def linear_forward_activation(A_prev, W, b, activation_function):

# linear_cache -- 存储 A_prev, W, b
# activation_cache -- 存储 Z

if activation_function == "sigmoid":
Z, linear_cache = linear_forward(A_prev, W, b)
A, activation_cache = sigmoid(Z)
elif activation_function == "relu":
Z, linear_cache = linear_forward(A_prev, W, b)
A, activation_cache = ReLU(Z)

# 当前层 A 的维度： 行 -- W矩阵行   列 -- A_prev矩阵列
assert (A.shape == (W.shape[0], A_prev.shape[1]))

cache = (linear_cache, activation_cache)

return A, cache

# 正向总体传播
def deep_forward(X, parameters):

# caches -- 存储隐藏层、输出层 A_prev, W, b
# AL -- 输出层输出 A

caches = []
A = X
L = len(parameters) // 2

# 隐藏层:对应激活函数 -- ReLU
for i in range(1, L):
A_prev = A
A, cache = linear_forward_activation(A_prev, parameters["W" + str(i)], parameters["b" + str(i)], "relu")
caches.append(cache)

# 输出层:对应激活函数 -- sigmoid
# W = parameters["W"+str(length)]
# b = parameters["b"+str(length)]
AL, cache = linear_forward_activation(A, parameters["W" + str(L)], parameters["b" + str(L)], "sigmoid")
caches.append(cache)

assert(AL.shape == (1,X.shape[1]))

return AL, caches

# 成本函数(cost function)
def compute_cost(AL, Y):

# Y -- 测试集真实值
# AL -- 正向传播所得预测值 Y(hat)

m = Y.shape[1]
cost = (1./m) * (-np.dot(Y,np.log(AL).T) - np.dot(1-Y,np.log(1-AL).T))
cost = np.squeeze(cost)
assert (cost.shape == ())

return cost

Y = np.asarray([[1,1,1]])
AL = np.asarray([[.8,.9,0.4]])
cost = compute_cost(AL,Y)

# 测试代码
'''
X = np.array([[1],[2],[3],[4],[5]])
parameters = initialize_parameters([5,4,3,1])
AL, caches = deep_forward(X, parameters)
Y = np.array([[2]])
print(compute_cost(AL, Y))
'''

# ------------------------反向传播------------------------

# 反向传播线性传播
def linear_backward(dZ, cache):

A_prev, W, b = cache

# 原理
'''
dW[l] = dZ[l]*A_prev
db[l] = dZ[l
dA[l-1] = W[l].T * dZ[l]
'''

#  m -- 样本数目
m = A_prev.shape[1]

# dW -- 损失函数对当前层 W 梯度
# db -- 损失函数对当前层 b 梯度
# dA_prev -- 损失函数对l-1层 A 梯度

dW = np.dot(dZ, A_prev.T) / m

# 保证数组的二维性
db = np.sum(dZ, axis = 1, keepdims = True) / m

dA_prev = np.dot(W.T, dZ)

assert (dW.shape == W.shape)
assert (db.shape == b.shape)
assert (dA_prev.shape == A_prev.shape)

return dA_prev, dW , db

# 测试代码
# 当前层数为 3，上层层数为 1
'''np.random.seed(3)
dZ = np.random.randn(3,1)
A_prev = np.random.randn(1,1)
W = np.random.randn(3,1)
b = np.random.randn(3,1)
linear_cache = (A_prev, W, b)
dA_prev, dW, db = linear_backward(dZ, linear_cache)
'''

# 反向传播 -- 线性->激活
def linear_activation_backward(dA, cache, activation):

# linear_cache -- 存储了A_prev, W, b

linear_cache, activation_cache = cache

if activation == "sigmoid":
dZ = sigmoid_backward(dA, activation_cache)
dA_prev, dW, db = linear_backward(dZ, linear_cache)
elif activation == "relu":
dZ = ReLU_backward(dA, activation_cache)
dA_prev, dW, db = linear_backward(dZ, linear_cache)

return dA_prev, dW, db

# 测试代码
'''
np.random.seed(2)
dA = np.random.randn(1,2)
A = np.random.randn(3,2)
W = np.random.randn(1,3)
b = np.random.randn(1,1)
Z = np.random.randn(1,2)
linear_cache = (A,W,b)
activation_cache = Z
cache = (linear_cache,activation_cache)
dA_prev,dW,db = linear_activation_backward(dA, cache, "relu")
'''

# 反向总体传播
def deep_backward(AL, Y, caches):

# AL -- 正向传播输出函数值(预测值)  组成向量
# Y -- 测试集真实值 组成向量

#递归神经网络深度
L = len(caches)

# 统一 Y与 AL维度
Y = Y.reshape(AL.shape)

# 求解 dA[l]
dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))

# sigmoid -> ReLU 反向传播
current_cache = caches[L - 1]

# 隐藏层间的反向传播， 同时记录各梯度数据以供参数更新使用
for i in reversed(range(L - 1)):
current_cache = caches[i]
dA_prev, dW, db = linear_activation_backward(grads["dA" + str(i + 2)], current_cache, "relu")

# 测试代码
'''
np.random.seed(3)
AL = np.random.randn(1, 2)
Y = np.array([[1, 0]])
A1 = np.random.randn(4,2)
W1 = np.random.randn(3,4)
b1 = np.random.randn(3,1)
Z1 = np.random.randn(3,2)
linear_cache_activation_1 = ((A1, W1, b1), Z1)
A2 = np.random.randn(3,2)
W2 = np.random.randn(1,3)
b2 = np.random.randn(1,1)
Z2 = np.random.randn(1,2)
linear_cache_activation_2 = ( (A2, W2, b2), Z2)
caches = (linear_cache_activation_1, linear_cache_activation_2)
'''

# 参数更新

# learning_rate: 学习率

L = len(parameters) // 2    #深层神经网络的层数

# 循环更新 parameters 参数
for i in range(L):
parameters["W"+str(i + 1)] = parameters["W" + str(i + 1)] - learning_rate * grads["dW"+str(i + 1)]
parameters["b"+str(i + 1)] = parameters["b" + str(i + 1)] - learning_rate * grads["db"+str(i + 1)]

return parameters

# 总体传播测试代码
'''
X = np.array([[1],[2],[3],[4],[5]])
parameters = initialize_parameters([5,4,3,1])
print("parameters:{}".format(parameters))
AL, caches = deep_forward(X, parameters)
Y = np.array([[2]])
learning_rate = 0.05
print("parameters_update:{}".format(parameters_update))
'''

#------------------------实际应用------------------------

# 加载数据集
train_dataset = h5py.File('train_catvnoncat.h5', "r")

# 训练集像素矩阵
train_set_x_orig = np.array(train_dataset["train_set_x"][:])

# 训练集标签
train_set_y_orig = np.array(train_dataset["train_set_y"][:])

test_dataset = h5py.File('test_catvnoncat.h5', "r")
# 测试像素矩阵
test_set_x_orig = np.array(test_dataset["test_set_x"][:])

#测试集标签
test_set_y_orig = np.array(test_dataset["test_set_y"][:])

# 类型列表
classes = np.array(test_dataset["list_classes"][:])

train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))

return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

# 预测神经网络结果
def predict(X, y, parameters):

# X -- 输入数据集
# y -- 真实值组成向量

# 样本数目集
number = X.shape[1]

# 神经网络层数

p = np.zeros((1,number))

AL, caches = deep_forward(X, parameters)

# 循环对预测值进行评估:
for i in range(0, AL.shape[1]):
if AL[0, i] > 0.5:
p[0, i] = 1
else:
p[0, i] = 0

# 计算预测正确所占比例
print("Accuracy: " + str(np.sum((p == y) /number )))

return p

def L_layer_model(X, Y, layer, iterations = 1000, learning_rate = 0.0075):
costs = []

parameters = initialize_parameters(layer)

# 一次传播过程
for i in range(0,iterations):
AL, caches = deep_forward(X, parameters)

cost = compute_cost(AL, Y)

# 每循环50次记录
if  i % 100 == 0:
print ("Cost after iteration %i: %f" %(i, cost))
if  i % 100 == 0:
costs.append(cost)
return parameters

# ---------main函数---------

train_x_orig, train_y, test_x_orig, test_y, classes = load_dataset()

# 存储数据集信息
m_train = train_x_orig.shape[0]
num_px = train_x_orig.shape[1]
m_test = test_x_orig.shape[0]

# 打印数据集信息
print("Number of training examples:" + str(m_train))
print("Number of testing examples:" + str(m_test))
print("Each image of size:(" + str(num_px) + "," + str(num_px) + ",3)")
print("train_x_orig shape:" + str(train_x_orig.shape))
print("train_y shape:" + str(train_y.shape))
print("test_x_orig shape:" + str(test_x_orig.shape))
print("test_y shape:" + str(test_y.shape))

# 测试集图片矩阵转换成向量形式
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T

# 数据集标准化(使数据集中在 0-1 间)
train_x = train_x_flatten / 255
test_x = test_x_flatten / 255

print("train_x's shape:" + str(train_x.shape))
print("test_x's shape:" + str(test_x.shape))

# 神经网络层数
layer = [12288, 20, 7, 5, 1]

parameters = L_layer_model(train_x, train_y, layer , iterations = 2500)

# 训练集正确率
print("train:")
predictions_train = predict(train_x,train_y,parameters)

# 测试集正确率
print("test:")
predictions_test = predict(test_x,test_y,parameters)

# 应用阶段图片测试
my_image = image.reshape([-1,1])

# 图片真实分类[0 or 1]
my_label_y = [0]

# 图片信息预测并打印
print("my_image:")
my_predicted_image = predict(my_image, my_label_y, parameters)
print("result:")
print ("y = " + str(np.squeeze(my_predicted_image)) + " " +classes[int(np.squeeze(my_predicted_image)),].decode("utf-8"))


#### 一篇文章搞懂人工智能、机器学习和深度学习之间的区别

2017-04-09 21:37:34

#### 简单理解：人工智能、数据挖掘、模式识别、机器学习、深度学习

2016-10-03 14:15:50

#### 人工智能框架，深度学习，神经网络

2018年03月17日 28.14MB 下载

#### 神经网络与深度学习.pdf

2017年06月03日 2.49MB 下载

#### 神经网络与深度学习（2）：梯度下降算法和随机梯度下降算法

2017-03-22 00:44:04

#### 一张图向你介绍清楚人工智能、机器学习、神经网络、深度学习、TensorFlow的概念区分。

2018年01月16日 96KB 下载

#### 机器学习，深度学习，人工智能，神经网络

2018-03-20 14:57:40

#### 最新整理python神经网络深度学习算法全套视频教程

2018-01-25 09:50:31

#### [人工智能]深度学习卷积神经网络的秒懂各种操作

2017-05-15 18:49:10

#### 斯坦福人工智能突破：结构递归神经网络用于 时空领域图像中的深度学习

2016-08-19 10:42:11