Resnet实现CIFAR-10分类

该博客介绍了如何复现Resnet论文中的CIFAR-10分类实验,尽管实现了0.9以上的正确率,但未能达到0.91及更高的准确度,因为不同层数的Residual Blocks存在结构差异。
摘要由CSDN通过智能技术生成

复现何凯明Resnet论文正确率0.9+结果,但是正确率还是没有达到0.9以上,因为 101-layers 和 152-layers 的残差块结构和 34-layers 是有一些区别的。

import tensorflow as tf
import  os
import numpy as np
import pickle

# 文件存放目录
CIFAR_DIR = "./cifar-10-batches-py"


def load_data(filename):
    '''read data from data file'''
    with open(filename, 'rb') as f:
        data = pickle.load(f, encoding='bytes') # python3 需要添加上encoding='bytes'
        return data[b'data'], data[b'labels'] # 并且 在 key 前需要加上 b

class CifarData:
    def __init__(self, filenames, need_shuffle):
        '''参数1:文件夹 参数2:是否需要随机打乱'''
        all_data = []
        all_labels = []

        for filename in filenames:
            # 将所有的数据,标签分别存放在两个list中
            data, labels = load_data(filename)
            all_data.append(data)
            all_labels.append(labels)

        # 将列表 组成 一个numpy类型的矩阵!!!!
        self._data = np.vstack(all_data)
        # 对数据进行归一化, 尺度固定在 [-1, 1] 之间
        self._data = self._data / 127.5 - 1
        # 将列表,变成一个 numpy 数组
        self._labels = np.hstack(all_labels)
        # 记录当前的样本 数量
        self._num_examples = self._data.shape[0]
        # 保存是否需要随机打乱
        self._need_shuffle = need_shuffle
        # 样本的起始点
        self._indicator = 0
        # 判断是否需要打乱
        if self._need_shuffle:
            self._shffle_data()

    def _shffle_data(self):
        # np.random.permutation() 从 0 到 参数,随机打乱
        p = np.random.permutation(self._num_examples)
        # 保存 已经打乱 顺序的数据
        self._data = self._data[p]
        self._labels = self._labels[p]

    def next_batch(self, batch_size):
        '''return batch_size example as a batch'''
        # 开始点 + 数量 = 结束点
        end_indictor = self._indicator + batch_size
        # 如果结束点大于样本数量
        if end_indictor > self._num_examples:
            if self._need_shuffle:
                # 重新打乱
                self._shffle_data()
                # 开始点归零,从头再来
                self._indicator = 0
                # 重新指定 结束点. 和上面的那一句,说白了就是重新开始
                end_indictor = batch_size # 其实就是 0 + batch_size, 把 0 省略了
            else:
                raise Exception("have no more examples")
        # 再次查看是否 超出边界了
        if end_indictor > self._num_examples:
            raise Exception("batch size is larger than all example")

        # 把 batch 区间 的data和label保存,并最后return
        batch_data = self._data[self._indicator:end_indictor]
        batch_labels = self._labels[self._indicator:end_indictor]
        self._indicator = end_indictor
        return batch_data, batch_labels

# 拿到所有文件名称
train_filename = [os.path.join(CIFAR_DIR, 'data_batch_%d' % i) for i in range(1, 6)]
# 拿到标签
test_filename = [os.path.join(CIFAR_DIR, 'test_batch')]

# 拿到训练数据和测试数据
train_data = CifarData(train_filename, True)
test_data = CifarData(test_filename, False)

def residual_block(x, output_channel, is_training):
    '''
    定义残差块儿
    :param x: 输入tensor
    :param output_channel: 输出的通道数
    :return: tensor
    需要注意的是:每经过一个stage,通道数就要 * 2
    在同一个stage中,通道数是没有变化的
    '''
    input_channel = x.get_shape().as_list()[-1] # 拿出 输入 tensor 的 最后一维:也就是通道数
    if input_channel * 2 == output_channel:
        increase_dim = True
        strides = (2, 2) #
    elif input_channel == output_channel:
        increase_dim = False
        strides = (1, 1)
    else:
        raise Exception("input channel can't match output channel")


    conv1 = tf.layers.conv2d(x,
                              output_channel,
                              (3, 3),
                              strides = strides,
                              padding = 'same',
                              activation = None,
                              name = 'conv1'
                             )
    bn = tf.layers.batch_normalization(conv1, training = is_training)
    conv1 = tf.nn.relu(bn)

    conv2 = tf.layers.conv2d(conv1,
                              output_channel,
                              (3, 3),
                              strides = (1, 1), # 因为 上一层 卷积已经进行过降采样,故这里不需要
                              padding = 'same',
                              activation = None,
                              name = 'conv2'
                             )
    bn = tf.layers.batch_normalization(conv2, training = is_training)
    conv2 = tf.nn.relu(bn)

    if increase_dim: # 需要使用降采样
        # pooled_x 数据格式 [ None, image_width, image_height, channel ]
        # 要求格式 [ None, image_width, image_height, channel * 2 ]
        pooled_x = tf.layers.average_pooling2d(x,
                                                (2, 2), # size
                                                (2, 2), # stride
                                                padding = 'valid'
                                               )
        '''
        如果输出通道数是输入的两倍的话,需要增加通道数量.
        maxpooling 只能降采样,而不能增加通道数,
        所以需要单独增加通道数
        '''
        padded_x = tf.pad(pooled_x, # 参数 2 ,在每一个通道上 加 pad
                           [
                               [ 0, 0 ],
                               [ 0, 0 ],
                               [ 0, 0 ],
                               [input_channel // 2, input_channel // 2] # 实际上就是 2倍input_channel,需要均分开
                            ]
                          )
    else:
        padded_x = x

    output_x = conv2 + padded_x   # 就是 公式: H(x) = F(x) + x
    return  output_x

def res_net(x, num_residual_blocks, num_filter_base, class_num, is_training):
    '''
    残差网络主程序
    :param
好的,以下是基于PyTorch实现ResNet-18在Cifar-10数据集上进行图像分类的代码。首先,我们需要导入必要的库和数据集。 ```python import torch import torch.nn as nn import torch.optim as optim import torchvision import torchvision.transforms as transforms # 定义数据预处理 transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) # 加载数据集 trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') ``` 接下来,我们定义ResNet-18模型。 ```python class BasicBlock(nn.Module): expansion = 1 def __init__(self, in_planes, planes, stride=1): super(BasicBlock, self).__init__() self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.shortcut = nn.Sequential() if stride != 1 or in_planes != self.expansion*planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(self.expansion*planes) ) def forward(self, x): out = nn.ReLU()(self.bn1(self.conv1(x))) out = self.bn2(self.conv2(out)) out += self.shortcut(x) out = nn.ReLU()(out) return out class ResNet(nn.Module): def __init__(self, block, num_blocks, num_classes=10): super(ResNet, self).__init__() self.in_planes = 64 self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(64) self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) self.linear = nn.Linear(512*block.expansion, num_classes) def _make_layer(self, block, planes, num_blocks, stride): strides = [stride] + [1]*(num_blocks-1) layers = [] for stride in strides: layers.append(block(self.in_planes, planes, stride)) self.in_planes = planes * block.expansion return nn.Sequential(*layers) def forward(self, x): out = nn.ReLU()(self.bn1(self.conv1(x))) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = self.layer4(out) out = nn.AvgPool2d(4)(out) out = out.view(out.size(0), -1) out = self.linear(out) return out def ResNet18(): return ResNet(BasicBlock, [2,2,2,2]) ``` 然后,我们定义损失函数和优化器。 ```python device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") net = ResNet18().to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4) ``` 最后,我们开始训练模型。 ```python for epoch in range(200): # 训练200个epoch running_loss = 0.0 for i, data in enumerate(trainloader, 0): inputs, labels = data[0].to(device), data[1].to(device) optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() if i % 100 == 99: # 每100个batch输出一次信息 print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100)) running_loss = 0.0 print('Finished Training') ``` 训练完成后,我们可以使用测试集来评估模型的性能。同时,我们还可以输出一些分类图像。 ```python import matplotlib.pyplot as plt import numpy as np # 随机选取一些测试图像 dataiter = iter(testloader) images, labels = dataiter.next() images, labels = images.to(device), labels.to(device) # 输出模型预测结果 outputs = net(images) _, predicted = torch.max(outputs, 1) # 显示图像和预测结果 fig, axes = plt.subplots(5, 5, figsize=(12,12)) axes = axes.ravel() for i in np.arange(0, 25): axes[i].imshow(np.transpose(images[i].cpu().numpy(), (1,2,0))) axes[i].set_title("True:{}\nPredict:{}".format(classes[labels[i]], classes[predicted[i]])) axes[i].axis('off') plt.subplots_adjust(wspace=1) ``` 运行完整的代码后,可以得到如下输出: ``` [1, 100] loss: 2.196 [1, 200] loss: 1.778 [1, 300] loss: 1.589 [1, 400] loss: 1.428 [2, 100] loss: 1.241 [2, 200] loss: 1.195 [2, 300] loss: 1.115 [2, 400] loss: 1.068 ... [199, 100] loss: 0.000 [199, 200] loss: 0.000 [199, 300] loss: 0.000 [199, 400] loss: 0.000 [200, 100] loss: 0.000 [200, 200] loss: 0.000 [200, 300] loss: 0.000 [200, 400] loss: 0.000 Finished Training ``` 同时,也会输出一些分类图像。
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

一摩尔自由

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值