原文链接:https://blog.csdn.net/u013733326/article/details/79639509
参考大佬的文章,复现吴恩达深度学习课程第二章的编程作业
7.30日更新,加一些详细点的注释
#!/usr/bin/env python
# coding: utf-8
# In[3]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
# 用来设置将numpy数组中的数据完全打印显示
np.set_printoptions(threshold=np.inf)
def load_dataset():
"""
该函数的作用是读取保存在h5文件中的数据集
h5py是存放两类对象的容器,数据集(dataset)和组(group)
数据集:类似数组类的数据集合
组:类似python,有键和值,组中可以存放group也可以存放dataset
"""
train_dataset = h5py.File('datasets/train_catvnoncat.h5',"r")
# print([keys for keys in train_dataset.keys()])
train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # 训练集图像
train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # 训练集标签
test_dataset = h5py.File('datasets/test_catvnoncat.h5',"r")
test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # 测试集图像
test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # 测试集标签
classes = np.array(test_dataset["list_classes"][:])
train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes
index = 25
train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()
# plt.imshow(train_set_x_orig[index])
# print("y=" + str(train_set_y[:,index]) + ", it's a " + classes[np.squeeze(train_set_y[:,index])].decode("utf-8") + " picture")
# print(train_set_x_orig.shape) # NHWC
m_train = train_set_x_orig.shape[0]
m_test = test_set_x_orig.shape[0]
num_px = train_set_x_orig.shape[1]
N_train = train_set_x_orig.shape[0]
H_train = train_set_x_orig.shape[1]
W_train = train_set_x_orig.shape[2]
C_train = train_set_x_orig.shape[3]
print("训练集的数量: m_train = " + str(m_train))
print("测试集的数量: m_test = " + str(m_test))
print("image: H = " + str(H_train) + ", W = " + str(W_train) + ", C = " + str(C_train))
print("训练集_图片的维度: " + str(train_set_x_orig.shape))
print("训练集_标签的维度: " + str(train_set_y.shape))
print("测试集_图片的维度: " + str(test_set_x_orig.shape))
print("测试集_标签的维度: " + str(test_set_y.shape))
# 对训练数据集和测试数据集进行降维并转置
train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T
print("训练集降维之后的维度: " + str(train_set_x_flatten.shape))
print("测试集降维之后的维度: " + str(test_set_x_flatten.shape))
# 归一化
train_set_x = train_set_x_flatten / 255
test_set_x = test_set_x_flatten / 255
# sigmoid激活函数实现
def sigmoid(z):
# sigmoid
s = 1 / (1 + np.exp(-z))
return s;
# print("==========sigmoid test=============");
# print("sigmoid(0) = " + str(sigmoid(0)));
# print("==========done ====================");
# 初始化权重数据和bias
def initialize_with_zeros(dim):
"""
初始化为0
w - 初始化为(dim, 1)
b - 初始标量
"""
w = np.zeros(shape=(dim,1))
b = 0
# 使用断言保证数据正确
assert(w.shape == (dim, 1))
# isinstance 用来判断两个类型是否相同
assert(isinstance(b, float) or isinstance(b, int))
return (w, b)
def propagate(w, b, X, Y):
"""
实现前向传播和后向传播的成本函数及其梯度(dw和db)
参数:
w - 权重
b - 偏差
X - 训练数据
Y - 标签
返回:
cost - 逻辑回归的负对数似然成本,即成本函数
dw - 相对于w的损失梯度
db - 相对于b的损失梯度
"""
m = X.shape[1]
# 正向传播
# np.dot(w.T, X)
A = sigmoid(np.dot(w.T, X) + b) # 计算激活值
cost = (-1 / m) * np.sum(Y * np.log(A) + (1 - Y) * (np.log(1 - A))) # 成本函数
# 反向传播
dw = (1 / m) * np.dot(X, (A - Y).T)
db = (1 / m) * np.sum(A - Y)
assert(dw.shape == w.shape)
assert(db.dtype == float)
cost = np.squeeze(cost)
assert(cost.shape == ())
grads = { "dw": dw, "db": db}
return (grads, cost)
def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False):
"""
此函数通过设置迭代次数,运行梯度下降算法来优化w和b
参数:
w - 权重数据
b - 偏差
X - 训练数据
Y - 标签
num_iterations - 迭代次数
learning_rate - 梯度下降更新规则的学习率
print_cost - 每100步打印一次损失值
返回:
params - 包含权重w和偏移b的字典
grads - 包含权重和偏移相对于成本函数的梯度字典
成本 - 优化期间所有的成本列表,将用于绘制学习曲线
"""
costs = []
for i in range(num_iterations):
grads, cost = propagate(w, b, X, Y)
dw = grads["dw"]
db = grads["db"]
w = w - learning_rate * dw
b = b - learning_rate * db
# 记录成本
if i % 100 == 0:
costs.append(cost)
if (print_cost) and (i % 100 == 0):
print("迭代次数: %i, 误差值: %f"%(i, cost))
params = {
"w" : w,
"b" : b
}
grads = {
"dw" : dw,
"db" : db
}
return (params, grads, costs)
def predict(w, b, X):
"""
使用逻辑回归函数预测标签
参数:
w - 权重
b - 偏移
X - 输入数据
"""
m = X.shape[1]
Y_prediction = np.zeros((1,m))
w = w.reshape(X.shape[0], 1)
# 预测是猫的概率,得到的是整个集合的结果,A是二维列表
A = sigmoid(np.dot(w.T, X) + b)
# 阈值设为0.5
for i in range(A.shape[1]):
Y_prediction[0,i] = 1 if A[0,i] > 0.5 else 0
assert(Y_prediction.shape == (1,m))
return Y_prediction
def model(X_train, Y_train, X_test, Y_test, num_iterations = 2000, learning_rate = 0.5, print_cost = False):
"""
调用函数,构建逻辑回归模型
参数:
X_train - np数组,维度为(num_px * num_py * 3, m_train)的训练集
Y_train - np数组,维度为(1, m_train)的训练集标签
X_test - np数组,维度为(num_px * num_py * 3, m_test)的测试集
Y_test - np数组,维度为(1,m_test)的测试集标签
num_iterations - 用于优化参数的迭代次数的超参数
learning_rate - 表示optimize()更新规则中使用的学习率超参
print_cost - 设置每100次迭代打印一次
返回:
d - 包含有关模型信息的字典
"""
w, b = initialize_with_zeros(X_train.shape[0])
parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost)
# 获取参数w和b
w, b = parameters["w"], parameters["b"]
# 预测实例
Y_prediction_test = predict(w, b, X_test)
Y_prediction_train = predict(w, b, X_train)
# 打印训练后的准确性
"""
np.mean() 求平均值
np.abs() 求绝对值
format() 格式化字符串
"""
print("训练集准确性: ", format(100 - np.mean(np.abs(Y_prediction_train - Y_train))*100),"%")
print("测试集准确性: ", format(100 - np.mean(np.abs(Y_prediction_test - Y_test))*100),"%")
d = {
"costs" : costs,
"Y_prediction_train" : Y_prediction_train,
"Y_prediction_test" : Y_prediction_test,
"w" : w,
"b" : b,
"learning_rate" : learning_rate,
"num_iterations" : num_iterations
}
return d
print("=============model test===============")
# d = model(train_set_x, train_set_y, test_set_x, test_set_y, num_iterations = 2000, learning_rate = 0.001, print_cost = True)
# costs = np.squeeze(d['costs'])
# # plt.plot(costs)
# # plt.ylabel('cost')
# # plt.xlabel('iterations(per hundreds)')
# # plt.title("Learning rate = " + str(d["learning_rate"]))
# # plt.show()
learning_rates = [0.01, 0.001, 0.0001]
models = {}
for i in learning_rates:
print("learning rate is: " + str(i))
models[i] = model(train_set_x, train_set_y, test_set_x, test_set_y, num_iterations = 2000, learning_rate = i, print_cost = True)
print('\n' + "-------------------------------" + '\n')
# 画折线,设置label
for i in learning_rates:
plt.plot(np.squeeze(models[i]['costs']), label = str(models[i]["learning_rate"]))
legend = plt.legend(loc = 'upper right', shadow = False)
# 添加frame
frame = legend.get_frame()
# frame背景色为透明
frame.set_facecolor('none')
plt.show()
本文复现了吴恩达深度学习课程的第二章编程作业,使用Python和NumPy从头开始实现了一个逻辑回归模型。文章详细介绍了数据加载、预处理、初始化参数、实现激活函数、前向和后向传播、梯度下降优化、预测函数及模型评估的全过程。

316

被折叠的 条评论
为什么被折叠?



