一、摘要:
1.训练了一个大型深度卷积神经网络来将ImageNet LSVRC-2010竞赛的120万高分辨率的图像分到1000不同的类别中。在测试数据上,得到了top-1 37.5%, top-5 17.0%的错误率,这个结果比目前的最好结果好很多。
2.这个神经网络有6000万参数和650000个神经元,包含5个卷积层(某些卷积层后面带有池化层)和3个全连接层,最后是一个1000维的softmax。
3.为了训练的更快,我们使用了非饱和神经元并对卷积操作进行了非常有效的GPU实现。
4.为了减少全连接层的过拟合,采用了dropout的正则化方法
二、架构使用的方法:
ReLU非线性
多GPU训练
重叠池化
数据增强
1.从256×256图像上通过随机提取224 × 224的图像块
2.改变训练对象RGB通道的强度
失活(Dropout)
以0.5的概率对每个隐层神经元的输出设为0。
三、定性评估:
显示了网络的两个数据连接层学习到的卷积核。网络学习到了大量的频率核、方向选择核,也学到了各种颜色点。
四、复现实验:
if __name__ == "__main__":
# config
data_dir = os.path.join(BASE_DIR, "..", "data", "train")
path_state_dict = os.path.join(BASE_DIR, "..", "data", "alexnet-owt-4df8aa71.pth")
num_classes = 2
MAX_EPOCH = 3 # 可自行修改
BATCH_SIZE = 128 # 可自行修改
LR = 0.001 # 可自行修改
log_interval = 1 # 可自行修改
val_interval = 1 # 可自行修改
classes = 2
start_epoch = -1
lr_decay_step = 1 # 可自行修改
# ============================ step 1/5 数据 ============================
norm_mean = [0.485, 0.456, 0.406]
norm_std = [0.229, 0.224, 0.225]
train_transform = transforms.Compose([
transforms.Resize((256)), # (256, 256) 区别
transforms.CenterCrop(256),
transforms.RandomCrop(224),
transforms.RandomHorizontalFlip(p=0.5),
transforms.ToTensor(),
transforms.Normalize(norm_mean, norm_std),
])
normalizes = transforms.Normalize(norm_mean, norm_std)
valid_transform = transforms.Compose([
transforms.Resize((256, 256)),
transforms.TenCrop(224, vertical_flip=False),
transforms.Lambda(lambda crops: torch.stack([normalizes(transforms.ToTensor()(crop)) for crop in crops])),
])
# 构建MyDataset实例
train_data = CatDogDataset(data_dir=data_dir, mode="train", transform=train_transform)
valid_data = CatDogDataset(data_dir=data_dir, mode="valid", transform=valid_transform)
# 构建DataLoder
train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(dataset=valid_data, batch_size=4)
# ============================ step 2/5 模型 ============================
alexnet_model = get_model(path_state_dict, False)
num_ftrs = alexnet_model.classifier._modules["6"].in_features
alexnet_model.classifier._modules["6"] = nn.Linear(num_ftrs, num_classes)
alexnet_model.to(device)
# ============================ step 3/5 损失函数 ============================
criterion = nn.CrossEntropyLoss()
# ============================ step 4/5 优化器 ============================
# 冻结卷积层
flag = 0
# flag = 1
if flag:
fc_params_id = list(map(id, alexnet_model.classifier.parameters())) # 返回的是parameters的 内存地址
base_params = filter(lambda p: id(p) not in fc_params_id, alexnet_model.parameters())
optimizer = optim.SGD([
{'params': base_params, 'lr': LR * 0.1}, # 0
{'params': alexnet_model.classifier.parameters(), 'lr': LR}], momentum=0.9)
else:
optimizer = optim.SGD(alexnet_model.parameters(), lr=LR, momentum=0.9) # 选择优化器
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_decay_step, gamma=0.1) # 设置学习率下降策略
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(patience=5)
# ============================ step 5/5 训练 ============================
train_curve = list()
valid_curve = list()
for epoch in range(start_epoch + 1, MAX_EPOCH):
loss_mean = 0.
correct = 0.
total = 0.
alexnet_model.train()
for i, data in enumerate(train_loader):
# if i > 1:
# break
# forward
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)
outputs = alexnet_model(inputs)
# backward
optimizer.zero_grad()
loss = criterion(outputs, labels)
loss.backward()
# update weights
optimizer.step()
# 统计分类情况
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).squeeze().cpu().sum().numpy()
# 打印训练信息
loss_mean += loss.item()
train_curve.append(loss.item())
if (i+1) % log_interval == 0:
loss_mean = loss_mean / log_interval
print("Training:Epoch[{:0>3}/{:0>3}] Iteration[{:0>3}/{:0>3}] Loss: {:.4f} Acc:{:.2%}".format(
epoch, MAX_EPOCH, i+1, len(train_loader), loss_mean, correct / total))
loss_mean = 0.
scheduler.step() # 更新学习率
五、训练模型:
六、记录问题:
代码里添加路径:
import sys
sys.path.append("F:论文文档/lesson/A_alexnet")