吴恩达深度学习第二章编程作业

本文复现了吴恩达深度学习课程的第二章编程作业,使用Python和NumPy从头开始实现了一个逻辑回归模型。文章详细介绍了数据加载、预处理、初始化参数、实现激活函数、前向和后向传播、梯度下降优化、预测函数及模型评估的全过程。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

原文链接:https://blog.csdn.net/u013733326/article/details/79639509

参考大佬的文章,复现吴恩达深度学习课程第二章的编程作业

7.30日更新,加一些详细点的注释

#!/usr/bin/env python
# coding: utf-8

# In[3]:


import numpy as np
import h5py
import matplotlib.pyplot as plt
# 用来设置将numpy数组中的数据完全打印显示
np.set_printoptions(threshold=np.inf)


def load_dataset():
	"""
		该函数的作用是读取保存在h5文件中的数据集
		h5py是存放两类对象的容器,数据集(dataset)和组(group)
		数据集:类似数组类的数据集合
		组:类似python,有键和值,组中可以存放group也可以存放dataset
		
	"""
    train_dataset = h5py.File('datasets/train_catvnoncat.h5',"r")
#     print([keys for keys in train_dataset.keys()])
    train_set_x_orig = np.array(train_dataset["train_set_x"][:])   # 训练集图像
    train_set_y_orig = np.array(train_dataset["train_set_y"][:])   # 训练集标签
    
    test_dataset = h5py.File('datasets/test_catvnoncat.h5',"r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:])    # 测试集图像
    test_set_y_orig = np.array(test_dataset["test_set_y"][:])    # 测试集标签
    
    classes = np.array(test_dataset["list_classes"][:])    
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

index = 25
train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()
# plt.imshow(train_set_x_orig[index])
# print("y=" + str(train_set_y[:,index]) + ", it's a " + classes[np.squeeze(train_set_y[:,index])].decode("utf-8") + " picture")

# print(train_set_x_orig.shape)  # NHWC

m_train = train_set_x_orig.shape[0]
m_test = test_set_x_orig.shape[0]
num_px = train_set_x_orig.shape[1]

N_train = train_set_x_orig.shape[0]
H_train = train_set_x_orig.shape[1]
W_train = train_set_x_orig.shape[2]
C_train = train_set_x_orig.shape[3]

print("训练集的数量: m_train = " + str(m_train))
print("测试集的数量: m_test = " + str(m_test))
print("image: H = " + str(H_train) + ", W = " + str(W_train) + ", C = " + str(C_train))
print("训练集_图片的维度: " + str(train_set_x_orig.shape))
print("训练集_标签的维度: " + str(train_set_y.shape))
print("测试集_图片的维度: " + str(test_set_x_orig.shape))
print("测试集_标签的维度: " + str(test_set_y.shape))

# 对训练数据集和测试数据集进行降维并转置
train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T

print("训练集降维之后的维度: " + str(train_set_x_flatten.shape))
print("测试集降维之后的维度: " + str(test_set_x_flatten.shape))

# 归一化
train_set_x = train_set_x_flatten / 255
test_set_x = test_set_x_flatten / 255

# sigmoid激活函数实现
def sigmoid(z):
    # sigmoid
    s = 1 / (1 + np.exp(-z))
    return s;
# print("==========sigmoid test=============");
# print("sigmoid(0) = " + str(sigmoid(0)));
# print("==========done ====================");

# 初始化权重数据和bias
def initialize_with_zeros(dim):
    """ 
    	初始化为0
        w - 初始化为(dim, 1)
        b - 初始标量
        
    """
    w = np.zeros(shape=(dim,1))
    b = 0
    
    # 使用断言保证数据正确
    
    assert(w.shape == (dim, 1))
	# isinstance 用来判断两个类型是否相同
    assert(isinstance(b, float) or isinstance(b, int))
    
    return (w, b)


def propagate(w, b, X, Y):
    """
        实现前向传播和后向传播的成本函数及其梯度(dw和db)
        参数:
            w - 权重
            b - 偏差
            X - 训练数据
            Y - 标签
        返回:
            cost - 逻辑回归的负对数似然成本,即成本函数
            dw - 相对于w的损失梯度
            db - 相对于b的损失梯度
    """
    m = X.shape[1]
    
    #  正向传播
    #  np.dot(w.T, X)
    A = sigmoid(np.dot(w.T, X) + b) # 计算激活值
    cost = (-1 / m) * np.sum(Y * np.log(A) + (1 - Y) * (np.log(1 - A))) # 成本函数
    
    #  反向传播
    dw = (1 / m) * np.dot(X, (A - Y).T)
    db = (1 / m) * np.sum(A - Y)
    
    assert(dw.shape == w.shape)
    assert(db.dtype == float)
    
    
    cost = np.squeeze(cost)
    assert(cost.shape == ())
    
    grads = { "dw": dw, "db": db}
    return (grads, cost)

def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False):
    """
        此函数通过设置迭代次数,运行梯度下降算法来优化w和b
        参数:
            w - 权重数据
            b - 偏差
            X - 训练数据
            Y - 标签
            num_iterations - 迭代次数
            learning_rate - 梯度下降更新规则的学习率
            print_cost - 每100步打印一次损失值
        返回:
            params - 包含权重w和偏移b的字典
            grads - 包含权重和偏移相对于成本函数的梯度字典
            成本 - 优化期间所有的成本列表,将用于绘制学习曲线
    """
    costs = []
    for i in range(num_iterations):
        grads, cost = propagate(w, b, X, Y)
        dw = grads["dw"]
        db = grads["db"]
        
        w = w - learning_rate * dw
        b = b - learning_rate * db
        
        # 记录成本
        if i % 100 == 0:
            costs.append(cost)
        if (print_cost) and (i % 100 == 0):
            print("迭代次数: %i, 误差值: %f"%(i, cost))
            
    params = {
        "w" : w,
        "b" : b
    }
    
    grads = {
        "dw" : dw,
        "db" : db
    }
    return (params, grads, costs)

def predict(w, b, X):
    """
        使用逻辑回归函数预测标签
    
        参数:
            w - 权重
            b - 偏移
            X - 输入数据
    """
    m = X.shape[1]
    Y_prediction = np.zeros((1,m))
    w = w.reshape(X.shape[0], 1)
    # 预测是猫的概率,得到的是整个集合的结果,A是二维列表
    A = sigmoid(np.dot(w.T, X) + b)
    
    # 阈值设为0.5
    for i in range(A.shape[1]):
        Y_prediction[0,i] = 1 if A[0,i] > 0.5 else 0
    assert(Y_prediction.shape == (1,m))
    
    return Y_prediction
def model(X_train, Y_train, X_test, Y_test, num_iterations = 2000, learning_rate = 0.5, print_cost = False):
    """
        调用函数,构建逻辑回归模型
        
        参数:
            X_train - np数组,维度为(num_px * num_py * 3, m_train)的训练集
            Y_train - np数组,维度为(1, m_train)的训练集标签
            X_test - np数组,维度为(num_px * num_py * 3, m_test)的测试集
            Y_test - np数组,维度为(1,m_test)的测试集标签
            num_iterations - 用于优化参数的迭代次数的超参数
            learning_rate - 表示optimize()更新规则中使用的学习率超参
            print_cost - 设置每100次迭代打印一次
            
        返回:
            d - 包含有关模型信息的字典
    """
    w, b = initialize_with_zeros(X_train.shape[0])
    parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost)
    
    # 获取参数w和b
    w, b = parameters["w"], parameters["b"]
    
    # 预测实例
    Y_prediction_test = predict(w, b, X_test)
    Y_prediction_train = predict(w, b, X_train)
    
    # 打印训练后的准确性
    """
        np.mean() 求平均值
        np.abs() 求绝对值
        format() 格式化字符串
    """
    print("训练集准确性: ", format(100 - np.mean(np.abs(Y_prediction_train - Y_train))*100),"%")
    print("测试集准确性: ", format(100 - np.mean(np.abs(Y_prediction_test - Y_test))*100),"%")
    
    d = {
        "costs" : costs,
        "Y_prediction_train" : Y_prediction_train,
        "Y_prediction_test" : Y_prediction_test,
        "w" : w,
        "b" : b,
        "learning_rate" : learning_rate,
        "num_iterations" : num_iterations
    }
    return d
print("=============model test===============")
# d = model(train_set_x, train_set_y, test_set_x, test_set_y, num_iterations = 2000, learning_rate = 0.001, print_cost = True)
# costs = np.squeeze(d['costs'])
# # plt.plot(costs)
# # plt.ylabel('cost')
# # plt.xlabel('iterations(per hundreds)')
# # plt.title("Learning rate = " + str(d["learning_rate"]))
# # plt.show()

learning_rates = [0.01, 0.001, 0.0001]
models = {}
for i in learning_rates:
    print("learning rate is: " + str(i))
    models[i] = model(train_set_x, train_set_y, test_set_x, test_set_y, num_iterations = 2000, learning_rate = i, print_cost = True)
    print('\n' + "-------------------------------" + '\n')

# 画折线,设置label
for i in learning_rates:
    plt.plot(np.squeeze(models[i]['costs']), label = str(models[i]["learning_rate"]))

legend = plt.legend(loc = 'upper right', shadow = False)
# 添加frame
frame = legend.get_frame()

# frame背景色为透明
frame.set_facecolor('none')
plt.show()
    






评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值