python实现识别猫

 

我觉得其中的难点就是如何计算dw和db的值(即前馈和反馈):

比较容易出bug的地方就是各个矩阵变量的形状,编程过程中一定要保持头脑的清醒,最好把矩阵的形状一一写出来,之后就按照公式写出来就可以了

def propagate(w, b, x, y):
    '''
    :param w: shape 1, 12288
    :param b: shape 1, 12288
    :param x: shape 12288, 209
    :param y: shape 1, 209
    A.shape 1,209
    dw.shape 1, 12288
    db.shape 1, 12288
    :return:
    '''
    # print('propagate:', w.shape, x.shape, y.shape)
    # 实现单次数据更新(前馈和反馈)
    # 只有一个输入层和一个输出层

    # 前馈
    A = Sigmoid(np.dot(w, x) + b)

    # 反馈
    dz = A - y
    dw = (1/n_train) * np.dot(dz, x.T)
    db = (1/n_train) * np.sum(dz)
    assert (dw.shape == w.shape)

    # 字典
    Dict = {
        'dw': dw,
        'db': db
        # 'loss': Loss
    }
    return Dict

 

附上完整程序:

import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from lr_utils import load_dataset


# 载入原始数据
train_x, train_y, test_x, test_y, classes = load_dataset()
'''
train_x.shape (209, 64, 64, 3)
train_y.shape (1, 209)
test_x.shape (50, 64, 64, 3)
test_y.shape (1, 50)
'''


# 训练集数目, 测试集数目, 每张图片高和宽的像素数
n_train = train_x.shape[0]
n_test = test_x.shape[0]
num_pix = train_x.shape[1]
'''
n_train.shape 209
n_test.shape 50
num_pix 64
'''

# 训练机和测试集降维并转置
train_x = train_x.reshape(n_train, -1).T
test_x = test_x.reshape(n_test, -1).T
'''
train_x.shape (12288, 209)
test_x.shape (12288, 50)
'''

# 标准化
train_x = train_x / 255
test_x = test_x / 255

# 激活函数
def Sigmoid(z):
    return 1 / (1 + np.exp(-z))


def initial(size):
    '''
    w.shape 1, 12288
    b.shape 1, 12288
    b.shape 1, 1
    :param size:
    :return:
    '''
    # 初始化权重w和偏置b,w.shape = [输出数据的个数, 输入的个数]
    w = np.zeros((1, size))
    b = 0
    return w, b


def propagate(w, b, x, y):
    '''

    :param w: shape 1, 12288
    :param b: shape 1, 12288
    :param x: shape 12288, 209
    :param y: shape 1, 209
    A.shape 1,209
    Loss.shape 1,1
    dw.shape 1, 12288
    db.shape 1, 12288
    :return:
    '''
    # print('propagate:', w.shape, x.shape, y.shape)
    # 实现单次数据更新(前馈和反馈)
    # 只有一个输入层和一个输出层

    # 前馈
    # A.size = 1
    A = Sigmoid(np.dot(w, x) + b)
    # Loss = (- 1 / n_train) * np.sum(y * np.log(A) + (1 - y) * (np.log(1 - A))) # 计算成本,请参考公式3和4。

    # 反馈
    dz = A - y
    dw = (1/n_train) * np.dot(dz, x.T)
    db = (1/n_train) * np.sum(dz)
    assert (dw.shape == w.shape)
    # dw = (1 / n_train) * np.dot(x, (A - y).T)  # 请参考视频中的偏导公式。
    # db = (1 / n_train) * np.sum(A - y)  # 请参考视频中的偏导公式。

    # 字典
    Dict = {
        'dw': dw,
        'db': db
        # 'loss': Loss
    }
    return Dict


def optimize(w, b, x, y, n_iter, learningRate):
    '''
    :param w: shape 1, 12288
    :param b: shape 1, 12288
    :param x: shape 12288, 209
    :param y: shape 1, 209
    :param n_iter:
    :param learningRate:
    :return:
    '''
    # 迭代优化
    for i in range(n_iter):
        Dict = propagate(w, b, x, y)
        dw = Dict['dw']
        db = Dict['db']
        w = w - dw * learningRate
        b = b - db * learningRate
        if i % 500 == 0:
            print(i)
    # 保存w,b
    Dict = {
        'w': w,
        'b': b
    }
    return Dict


def predict(w, b, x):
    '''

    :param w: shape 1, 12288
    :param b: shape 1, 1
    :param x: shape 12288, 1
    A.shape 1, 1
    :return:
    '''
    # 预测
    A = Sigmoid(np.dot(w, x) + b)
    return A
    # print('predict', w.shape, x.shape, A.shape, A)
    # Pred = 0
    # Pred = A
    # return Pred





w, b = initial(num_pix * num_pix * 3)
n_iter, learningRate = [5000, 0.005]
Dict = optimize(w, b, train_x, train_y, n_iter, learningRate)
w = Dict['w']
b = Dict['b']
root1 = './Image/t6.png' # cat
root2 = './Image/t10.png' # dog
root3 = './Image/t9.png' # deer
Input = Image.open(root2)
Input = Input.convert("RGB").resize((64, 64))
InputArray = np.array(Input).reshape(-1, 1)
InputArray = (255 - InputArray) / 255
Output = predict(w, b, InputArray)
print(Output)

输出的结果是一张图片是猫的概率,不太准,可能是数据太少了,209张训练图片

 

  • 1
    点赞
  • 17
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
很好,下面是一份简单的 PyTorch 代码实现识别: ``` import torch import torch.nn as nn import torchvision import torchvision.transforms as transforms # 定义神经网络 class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(3, 6, 5) self.pool = nn.MaxPool2d(2, 2) self.conv2 = nn.Conv2d(6, 16, 5) self.fc1 = nn.Linear(16 * 5 * 5, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 2) def forward(self, x): x = self.pool(torch.relu(self.conv1(x))) x = self.pool(torch.relu(self.conv2(x))) x = x.view(-1, 16 * 5 * 5) x = torch.relu(self.fc1(x)) x = torch.relu(self.fc2(x)) x = self.fc3(x) return x # 加载数据集 transform = transforms.Compose([transforms.RandomResizedCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) trainset = torchvision.datasets.ImageFolder("train", transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2) # 定义优化器和损失函数 net = Net() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9) # 训练神经网络 for epoch in range(2): running_loss = 0.0 for i, data in enumerate(trainloader, 0): inputs, labels = data optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() if i % 2000 == 1999: print("[{}, {}] loss: {:.3f}".format(epoch+1, i+1, running_loss/2000)) running_loss = 0.0 print("Finished Training") ``` 请注意,此代码仅供参考,您可能需要针对自己的具体需求进行修改。
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值