深度学习-第J2周：ResNet-50-V2算法

最新推荐文章于 2024-07-03 20:01:22 发布

quant_day

最新推荐文章于 2024-07-03 20:01:22 发布

阅读量640

点赞数

文章标签：深度学习人工智能

本文链接：https://blog.csdn.net/weixin_43397208/article/details/131485816

版权

🍨 本文为[🔗365天深度学习训练营]内部限免文章（版权归 *K同学啊* 所有）
🍖 作者：[K同学啊]

前言

上周复现了何恺明在2015年提出的深度残差网络ResNet（deep residual network），大神在后来的论文中提到一种全新的残差单元，我们命名为ResNet_V2, 本文将在上周的基础上基于Pytorch实现ResNet_V2模型

上周文章：深度学习-第J1周：ResNet-50算法的Pytorch实现及解析_quant_day的博客-CSDN博客

一、ResNet50 VS ResNet_V2 的异同

对比ResNet50的框架图

ResNet50_V2 的框架图多了下面红色框框里的block，不难看出这个block其实就是把ResNet50里面的Conv Block 模块的conv2d 改成了 MaxPool,

所以我们可以把Conv Block 、 Conv Block_V2 写到一块去，其中 IdentityBlock 也有些区别，代码如下

class IdentityBlock_V2(nn.Module):

    def __init__(self, in_channel, kl_size, filters):
        super(IdentityBlock_V2, self).__init__()
        
        filter1, filter2, filter3 = filters
        self.bn0 = nn.BatchNorm2d(num_features=in_channel)
        
        self.cov1 = nn.Conv2d(in_channels=in_channel, out_channels=filter1, kernel_size=1, stride=1, padding=0)
        self.bn1 = nn.BatchNorm2d(num_features=filter1)
        self.relu = nn.ReLU(inplace=True)
        self.zeropadding2d1 = nn.ZeroPad2d(1)
        
        self.cov2 = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=kl_size, stride=1, padding=0)
        self.bn2 = nn.BatchNorm2d(num_features=filter2)
        
        self.cov3 = nn.Conv2d(in_channels=filter2, out_channels=filter3, kernel_size=1, stride=1, padding=0)
        self.bn3 = nn.BatchNorm2d(num_features=filter3)        

    def forward(self, x):
        
        identity = self.bn0(x)
        identity = self.relu(identity)
        
        identity = self.cov1(identity)
        identity = self.bn1(identity)
        identity = self.relu(identity)
        identity = self.zeropadding2d1(identity)  
        
        identity = self.cov2(identity)
        identity = self.bn2(identity)
        identity = self.relu(identity)        
        
        identity = self.cov3(identity)

        x = identity + x   
        x = self.relu(x)       
        
        return x

class ConvBlock_V2(nn.Module):

    def __init__(self, in_channel, kl_size, filters, stride_size=2, conv_shortcut=False):
        super(ConvBlock_V2, self).__init__()

        filter1, filter2, filter3 = filters
        
        self.bn0 = nn.BatchNorm2d(num_features=in_channel)
        
        self.cov1 = nn.Conv2d(in_channels=in_channel, out_channels=filter1, kernel_size=1, stride=stride_size, padding=0)
        self.bn1 = nn.BatchNorm2d(num_features=filter1)
        self.relu = nn.ReLU(inplace=True)
        self.zeropadding2d1 = nn.ZeroPad2d(1)
        
        self.cov2 = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=kl_size, stride=1, padding=0)
        self.bn2 = nn.BatchNorm2d(num_features=filter2)
        
        self.cov3 = nn.Conv2d(in_channels=filter2, out_channels=filter3, kernel_size=1, stride=1, padding=0)
        self.bn3 = nn.BatchNorm2d(num_features=filter3)   
        
        self.conv_shortcut = conv_shortcut
        if self.conv_shortcut:
            self.short_cut = nn.Conv2d(in_channels=in_channel, out_channels=filter3, kernel_size=1, stride=stride_size, padding=0)
        else:
            self.short_cut = nn.MaxPool2d(kernel_size=1, stride=stride_size, padding=0)

    def forward(self, x):    
        
        identity = self.bn0(x)
        identity = self.relu(identity)
        short_cut = self.short_cut(identity)
		
        identity = self.cov1(identity)
        identity = self.bn1(identity)
        identity = self.relu(identity)    
        identity = self.zeropadding2d1(identity)    
        
        identity = self.cov2(identity)
        identity = self.bn2(identity)
        identity = self.relu(identity)        
        
        identity = self.cov3(identity)
        x = identity + short_cut
        x = self.relu(x)   
        
        return x

具体区别就是，在ConvBlock_V2里加了一个判断，如果conv_shortcut=True, 跟ResNet50的一样调用nn.Conv2d，否则调用nn.MaxPool2d

        if self.conv_shortcut:
            self.short_cut = nn.Conv2d(in_channels=in_channel, out_channels=filter3, kernel_size=1, stride=stride_size, padding=0)
        else:
            self.short_cut = nn.MaxPool2d(kernel_size=1, stride=stride_size, padding=0)

值得注意的是， short_cut 项需要 BN跟RELU 一次

identity = self.bn0(x)
identity = self.relu(identity)
short_cut = self.short_cut(identity)

二、ResNet_V2 模型实现

上面说了ResNet50 VS ResNet_V2的区别

完整 ResNet_V2代码及参数如下

class Resnet50_Model_V2(nn.Module):
    def __init__(self):
        super(Resnet50_Model_V2, self).__init__()
        self.in_channels = 3
        # ============= 基础层
        # 方法1
        self.zeropadding2d_0 = nn.ZeroPad2d(3)
        self.cov0 = nn.Conv2d(self.in_channels, out_channels=64, kernel_size=7, stride=2)
        self.zeropadding2d_1 = nn.ZeroPad2d(1)
        self.maxpool0 = nn.MaxPool2d(kernel_size=3, stride=2, padding=0)
              
        self.layer1 = nn.Sequential(
            ConvBlock_V2(64, 3, [64, 64, 256], 1, 1),
            IdentityBlock_V2(256, 3, [64, 64, 256]),
            ConvBlock_V2(256, 3, [64, 64, 256], 2, 0),
            )
        
        self.layer2 = nn.Sequential(
            ConvBlock_V2(256, 3, [128, 128, 512], 1, 1),
            IdentityBlock_V2(512, 3, [128, 128, 512]),
            IdentityBlock_V2(512, 3, [128, 128, 512]),
            ConvBlock_V2(512, 3, [128, 128, 512], 2, 0),
            )

        self.layer3 = nn.Sequential(
            ConvBlock_V2(512, 3, [256, 256, 1024], 1, 1),
            IdentityBlock_V2(1024, 3, [256, 256, 1024]),
            IdentityBlock_V2(1024, 3, [256, 256, 1024]),
            IdentityBlock_V2(1024, 3, [256, 256, 1024]),
            IdentityBlock_V2(1024, 3, [256, 256, 1024]),
            ConvBlock_V2(1024, 3, [256, 256, 1024], 2, 0),
            )

        self.layer4 = nn.Sequential(
            ConvBlock_V2(1024, 3, [512, 512, 2048], 1, 1),
            IdentityBlock_V2(2048, 3, [512, 512, 2048]),
            IdentityBlock_V2(2048, 3, [512, 512, 2048]),
            )
        
        # 输出网络
        self.bn = nn.BatchNorm2d(num_features=2048)  
        self.relu = nn.ReLU(inplace=True)
        self.avgpool = nn.AvgPool2d((7, 7))
        # classfication layer
        # 7*7均值后2048个参数
        self.fc = nn.Sequential(nn.Linear(2048, N_classes),
                                nn.Softmax(dim=1))

    def basic_layer1(self, x):
        '''
        input:  x = tensor(3, 224, 224).unsqueeze(0)
         Layer (type)               Output Shape         Param #
      ================================================================
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
      ================================================================   
        '''
        x = self.zeropadding2d_0(x)
        x = self.cov0(x)
        x = self.zeropadding2d_1(x)
        x = self.maxpool0(x)
        
        return x

    def forward(self, x):
        
        x = self.forward1(x)
        
        return x
    
    def forward1(self, x):
        
        x = self.basic_layer1(x)
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        x = self.bn(x)
        x = self.relu(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

三、ResNet50 VS ResNet_V2 的训练结果对比

def train_and_test(model, loss_func, optimizer, epochs=25):
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    summary.summary(model, (3, 224, 224))
    
    record = []
    best_acc = 0.0
    best_epoch = 0
    
    for epoch in range(epochs):#训练epochs轮
            epoch_start = time.time()
            print("Epoch: {}/{}".format(epoch + 1, epochs))
    
            model.train()#训练
    
            train_loss = 0.0
            train_acc = 0.0
            valid_loss = 0.0
            valid_acc = 0.0
    
            for i, (inputs, labels) in enumerate(train_data):
                inputs = inputs.to(device)
                labels = labels.to(device)
                #print(labels)
                # 记得清零
                optimizer.zero_grad()
    
                outputs = model(inputs)
    
                loss = loss_func(outputs, labels)
    
                loss.backward()
    
                optimizer.step()
    
                train_loss += loss.item() * inputs.size(0)
                if i%10==0:
                    print("train data: {:01d} / {:03d} outputs: {}".format(i, len(train_data), outputs.data[0]))
                ret, predictions = torch.max(outputs.data, 1)
                correct_counts = predictions.eq(labels.data.view_as(predictions))
    
                acc = torch.mean(correct_counts.type(torch.FloatTensor))
    
                train_acc += acc.item() * inputs.size(0)

            with torch.no_grad():
                model.eval()#验证
    
                for j, (inputs, labels) in enumerate(test_data):
                    inputs = inputs.to(device)
                    labels = labels.to(device)
    
                    outputs = model(inputs)
    
                    loss = loss_func(outputs, labels)
    
                    valid_loss += loss.item() * inputs.size(0)
                    if j%10==0:
                        print("val data: {:01d} / {:03d} outputs: {}".format(j, len(test_data), outputs.data[0]))
                    ret, predictions = torch.max(outputs.data, 1)
                    correct_counts = predictions.eq(labels.data.view_as(predictions))
    
                    acc = torch.mean(correct_counts.type(torch.FloatTensor))
    
                    valid_acc += acc.item() * inputs.size(0)
    
            avg_train_loss = train_loss / train_data_size
            avg_train_acc = train_acc / train_data_size
    
            avg_valid_loss = valid_loss / test_data_size
            avg_valid_acc = valid_acc / test_data_size
    
    
            record.append([avg_train_loss, avg_valid_loss, avg_train_acc, avg_valid_acc])
    
            if avg_valid_acc > best_acc  :#记录最高准确性的模型
                best_acc = avg_valid_acc
                best_epoch = epoch + 1
    
            epoch_end = time.time()
    
            print("Epoch: {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}%, \n\t\tValidation: Loss: {:.4f}, Accuracy: {:.4f}%, Time: {:.4f}s".format(
                    epoch + 1, avg_valid_loss, avg_train_acc * 100, avg_valid_loss, avg_valid_acc * 100,
                    epoch_end - epoch_start))
            print("Best Accuracy for validation : {:.4f} at epoch {:03d}".format(best_acc, best_epoch))    
    
    return model, record
#%%
if __name__=='__main__':
 
    epochs = 100
    model = Resnet50_Model_V2()
    
    loss_func = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),lr=0.0001)
    model, record = train_and_test(model, loss_func, optimizer, epochs)

    torch.save(model, './Best_Resnet50_V2.pth')

    record = np.array(record)
    plt.plot(record[:, 0:2])
    plt.legend(['Train Loss', 'Valid Loss'])
    plt.xlabel('Epoch Number')
    plt.ylabel('Loss')
    plt.ylim(0, 1.5)
    plt.savefig('Loss_V2.png')
    plt.show()

    plt.plot(record[:, 2:4])
    plt.legend(['Train Accuracy', 'Valid Accuracy'])
    plt.xlabel('Epoch Number')
    plt.ylabel('Accuracy')
    plt.ylim(0, 1)
    plt.savefig('Accuracy_V2.png')
    plt.show()

训练模型并保存 ResNet_V2的结果为 Loss_V2.png 、Accuracy_V2.png

对比如下：

ResNet50模型，在40层Epoch以后才到达比较好的结果