>- **🍨 本文为[🔗365天深度学习训练营](https://mp.weixin.qq.com/s/Z9yL_wt7L8aPOr9Lqb1K3w) 中的学习记录博客**
>- **🍖 原作者:[K同学啊](https://mtyjkh.blog.csdn.net/)**
本周任务:
●1.请根据本文 TensorFlow 代码,编写出相应的 Pytorch 代码
●2.了解残差结构
●3.是否可以将残差模块融入到C3当中(自由探索)
一、设置GPU
二、导入数据
共有四个类别,图片总数为565
三、数据处理
对数据集进行一个预处理,首先将图片尺寸统一调整为224*224像素,再做随机翻转以提升泛化能力做数据增强处理,接着转换为 PyTorch 的张量 (tensor) 格式,并将像素值归一化到 [0,1] 之间,然后做一个标准化处理,最终创建一个 ImageFolder 数据集对象,我们照例查看了每个类别的索引。
运行结果:
四、构建模型
class Identity_block(nn.Module):
def __init__(self, in_channels, filters, kernel_size, stage, block):
super(Identity_block, self).__init__()
filters1, filters2, filters3 = filters
name_base = str(stage) + block + '_identity_block'
self.conv1 = nn.Conv2d(in_channels, filters1, kernel_size=1, stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(filters1)
self.conv2 = nn.Conv2d(filters1, filters2, kernel_size=kernel_size, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(filters2)
self.conv3 = nn.Conv2d(filters2, filters3, kernel_size=1, stride=1, padding=0, bias=False)
self.bn3 = nn.BatchNorm2d(filters3)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
identity = x
out = self.relu(self.bn1(self.conv1(x)))
out = self.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
out += identity
out = self.relu(out)
return out
class Conv_block(nn.Module):
def __init__(self, in_channels, filters, kernel_size, stage, block, strides=2):
super(Conv_block, self).__init__()
filters1, filters2, filters3 = filters
res_name_base = str(stage) + block + '_conv_block_res_'
name_base = str(stage) + block + '_conv_block_'
self.conv1 = nn.Conv2d(in_channels, filters1, kernel_size=1, stride=strides, bias=False)
self.bn1 = nn.BatchNorm2d(filters1)
self.conv2 = nn.Conv2d(filters1, filters2, kernel_size=kernel_size, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(filters2)
self.conv3 = nn.Conv2d(filters2, filters3, kernel_size=1, stride=1, bias=False)
self.bn3 = nn.BatchNorm2d(filters3)
self.shortcut = nn.Sequential(
nn.Conv2d(in_channels, filters3, kernel_size=1, stride=strides, bias=False),
nn.BatchNorm2d(filters3)
)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
identity = self.shortcut(x)
out = self.relu(self.bn1(self.conv1(x)))
out = self.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
out += identity
out = self.relu(out)
return out
class Resnet50(nn.Module):
def __init__(self, input_shape=(3, 224, 224), classes=1000):
super(Resnet50, self).__init__()
self.in_channels = 64
self.conv1 = nn.Conv2d(input_shape[0], self.in_channels, kernel_size=7, padding=3, stride=2, bias=False)
self.bn1 = nn.BatchNorm2d(self.in_channels)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer([64, 64, 256], blocks=3, stage=2, stride=1)
self.layer2 = self._make_layer([128, 128, 512], blocks=4, stage=3, stride=2)
self.layer3 = self._make_layer([256, 256, 1024], blocks=6, stage=4, stride=2)
self.layer4 = self._make_layer([512, 512, 2048], blocks=3, stage=5, stride=2)
self.avgpool = nn.AvgPool2d((7, 7))
self.fc = nn.Linear(2048, classes)
def _make_layer(self, filters, blocks, stage, stride):
layers = []
# first block is a ConvBlock with stride
layers.append(Conv_block(self.in_channels, filters, kernel_size=3, stage=stage, block='a', strides=stride))
self.in_channels = filters[2]
# remaining is a IdentityBlocks
for b in range(1, blocks):
layers.append(Identity_block(self.in_channels, filters, kernel_size=3, stage=stage, block=chr(97 + b)))
return nn.Sequential(*layers) ##
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
model = Resnet50()
print(model)
五、训练模型
# 设置优化器和学习率
# 设置优化器和损失函数
learning_rate = 1e-3
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()
# 设置训练函数和测试函数
# 设置训练函数
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
num_batches = len(dataloader)
train_loss, train_acc = 0, 0
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
loss = loss_fn(pred, y)
# 反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 记录准确率与损失
train_acc += (pred.argmax(1) == y).type(torch.float).sum().item()
train_loss += loss.item()
train_acc /= size
train_loss /= num_batches
return train_acc, train_loss
# 设置测试函数
def test(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
test_loss, test_acc = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
test_pred = model(X)
loss = loss_fn(test_pred, y)
test_loss += loss.item()
test_acc += (test_pred.argmax(1) == y).type(torch.float).sum().item()
test_acc /= size
test_loss /= num_batches
return test_acc, test_loss
model = model.to(device)
epochs = 10
train_loss = []
train_acc = []
test_loss = []
test_acc = []
# 训练和测试循环
for epoch in range(epochs):
model.train()
epoch_train_acc, epoch_train_loss = train(train_dl, model, loss_fn, optimizer)
model.eval()
epoch_test_acc, epoch_test_loss = test(test_dl, model, loss_fn)
train_acc.append(epoch_train_acc)
train_loss.append(epoch_train_loss)
test_acc.append(epoch_test_acc)
test_loss.append(epoch_test_loss)
# 获取当前的学习率
lr = optimizer.state_dict()['param_groups'][0]['lr']
print(f'Epoch: {epoch + 1:2d}, Train_acc: {epoch_train_acc * 100:.1f}%, Train_loss: {epoch_train_loss:.3f}, '
f'Test_acc: {epoch_test_acc * 100:.1f}%, Test_loss: {epoch_test_loss:.3f}, lr: {lr:.2E}')
六、模型评估
总结:
ResNet-50 是一种深度卷积神经网络,采用了残差学习框架来提高网络的训练效率和性能。它由 50 层构成,通过引入残差块(Residual Block)来解决深层网络训练中的梯度消失和爆炸问题。每个残差块通过快捷连接(shortcut connection)将输入直接加到输出上,使得网络更容易学习恒等映射,并且显著加快了收敛速度。
可以加入C3模块进一步增强 ResNet-50 模型的特征提取能力。C3 模块是一个结合了卷积、池化和残差连接的高级模块,可以有效地捕捉更多的上下文信息和复杂特征,从而提升模型的性能。后续会对模型做进一步改进。