目录
仅是个人笔记,代码学习资源来源B站博主霹雳吧啦Wz的个人空间_哔哩哔哩_bilibili
一、网络原理
1. LRN 局部响应归一化(详见超链接)
2.经卷积后的矩阵尺寸大小计算公式为: N = (W − F + 2P ) / S + 1
3.网络结构
二、复现细节
如下步骤:
1 配置GPU/CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
2 数据
- 定义数据变换——使用torchvision自带的transform,常用模块(详见超链接)
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224), #随机裁剪为224*224大小
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
"val": transforms.Compose([transforms.Resize((224, 224)), # cannot 224, must (224, 224)
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
- 读取数据——(训练和验证分开,同一种花放在同一个文件夹)
01 构建dataset,使用torchvision自带的datasets.ImageFolder(详见超链接)
train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
transform=data_transform["train"])
validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
transform=data_transform["val"])
02 构建dataloader,使用torch.utils.data.DataLoader(详见超链接)
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=32, shuffle=True,
num_workers=0)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=4, shuffle=False,
num_workers=0)
3 定义网络、超参数
net = AlexNet(num_classes=5, init_weights=True) #自己搭建或调用torch自带
net.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0002)
epochs = 10
save_path = './AlexNet.pth'
best_acc = 0.0
4 训练套路
for epoch in range(epochs):
# 3.1 train
net.train() #训练前加入此句话
running_loss = 0.0
train_bar = tqdm(train_loader, file=sys.stdout) #显示进度条
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
outputs = net(images.to(device))
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()
running_loss += loss.item()
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,epochs,loss)
#运行看一下就懂了,显示的损失是每个批次的损失
# 3.2 validate
net.eval() #验证前加入此句话
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
predict_y = torch.max(outputs, dim=1)[1] #简单,csdn一下 predict_y==(outputs每一行最大值的)索引
acc += torch.eq(predict_y, val_labels.to(device)).sum().item() #累加 每一个批次准确的个数
val_accurate = acc / val_num #一轮的准确率
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch + 1, running_loss / train_steps, val_accurate))
# 3.3 判断是否保存
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)
5 其它细节
- 网络搭建----padding、特征图计算
- 类别json文件生成
通过datasets.ImageFolder生成的train_dataset
flower_list = train_dataset.class_to_idx # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
cla_dict = dict((val, key) for key, val in flower_list.items())
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
- 损失计算-----nn.CrossEntropyLoss(详见超链接)
- 网络初始化:
class AlexNet(nn.Module):
def __init__(self, num_classes=1000, init_weights=False):
super(AlexNet, self).__init__()
self.features = nn.Sequential(
.....
)
self.classifier = nn.Sequential(
.....
)
if init_weights:
self._initialize_weights()
def forward(self, x):
x = self.features(x) # x维度是 B C H W 批量,通道,高,宽
x = torch.flatten(x, start_dim=1) # 从索引1也就是C开始展平,变为 B C*H*W
x = self.classifier(x)
return x
def _initialize_weights(self):
for m in self.modules(): # 遍历每一层结构
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)