reference:
GoogLeNet网络结构详解与模型的搭建_太阳花的小绿豆的博客-CSDN博客_googlenet网络结构
5.2 使用pytorch搭建GoogLeNet网络_哔哩哔哩_bilibili
GoogLeNet在2014年由Google团队提出(注意GoogLeNet中的L大写是为了致敬LeNet)。原论文名称是《Going deeper with convolutions》
下面是原论文中给出的参数列表和网络结构图:
下面这幅图是将Inception模块所使用到的参数信息标注在每个分支上,其中 #1x1对应着分支1上1x1的卷积核个数,#3x3reduce 对应着分支2上1x1的卷积核个数,#3x3对应着分支2上3x3的卷积核个数,#5x5reduce 对应着分支3上1x1的卷积核个数,#5x5对应着分支3上5x5的卷积核个数,poolproj 对应着分支4上1x1的卷积核个数。
下面是辅助分类器结构,网络中的两个辅助分类器结构Aux_logits是一样的,如下图所示:
两个辅助分类器的输入分别来自Inception(4a)和Inception(4d)
第一层是一个平均池化下采样层,池化核大小为5x5,stride=3
第二层是卷积层,卷积核大小为1x1,stride=1,卷积核个数是128
第三层是全连接层,节点个数是1024
第四层是全连接层,节点个数是1000(对应分类的类别个数)
model:
import torch.nn as nn
import torch
import torch.nn.functional as F
# input 3 * 224 * 224 pytorch默认stride=1, padding=0
class GoogLeNet(nn.Module):
def __init__(self, num_classes=1000, aux_logits=True, init_weights=False): # aux_logits是否使用辅助分类器
super(GoogLeNet, self).__init__()
self.aux_logits = aux_logits
self.conv1 = BasicConv2d(3, 64, kernel_size=7, stride=2, padding=3) #(224+2*3–1*(7-1)-1)/2+1=112 output:112 * 112 * 64
self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True) #ceil_mode=True代表向上取整,ceil_mode=False向下取整
#(112+2*0–1*(3-1)-1)/2+1=56 output:56 * 56 * 64
self.conv2 = BasicConv2d(64, 64, kernel_size=1) # output:56 * 56 * 64
self.conv3 = BasicConv2d(64, 192, kernel_size=3, padding=1) # output:56 * 56 * 192
self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True) # output:28 * 28 * 64
self.inception3a = Inception(192, 64, 96, 128, 16, 32, 32) # output:28 * 28 * 256
self.inception3b = Inception(256, 128, 128, 192, 32, 96, 64) # output:28 * 28 * 480
self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True) # output:14 * 14 * 480
self.inception4a = Inception(480, 192, 96, 208, 16, 48, 64) # output:14 * 14 * 512
self.inception4b = Inception(512, 160, 112, 224, 24, 64, 64) # output:14 * 14 * 512
self.inception4c = Inception(512, 128, 128, 256, 24, 64, 64) # output:14 * 14 * 512
self.inception4d = Inception(512, 112, 144, 288, 32, 64, 64) # output:14 * 14 * 528
self.inception4e = Inception(528, 256, 160, 320, 32, 128, 128) # output:14 * 14 * 832
self.maxpool4 = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True) # output:7 * 7 * 832
self.inception5a = Inception(832, 256, 160, 320, 32, 128, 128) # output:7 * 7 * 832
self.inception5b = Inception(832, 384, 192, 384, 48, 128, 128) # output:7 * 7 * 1024
if self.aux_logits:
self.aux1 = InceptionAux(512, num_classes)
self.aux2 = InceptionAux(528, num_classes)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # 自适应平均池化下采样,无论输入特征矩阵为多少都能得到一个高为1,宽为1的特征矩阵
self.dropout = nn.Dropout(0.4)
self.fc = nn.Linear(1024, num_classes)
if init_weights:
self._initialize_weights()
def forward(self, x): # 定义正向传播过程
# N x 3 x 224 x 224
x = self.conv1(x)
# N x 64 x 112 x 112
x = self.maxpool1(x)
# N x 64 x 56 x 56
x = self.conv2(x)
# N x 64 x 56 x 56
x = self.conv3(x)
# N x 192 x 56 x 56
x = self.maxpool2(x)
# N x 192 x 28 x 28
x = self.inception3a(x)
# N x 256 x 28 x 28
x = self.inception3b(x)
# N x 480 x 28 x 28
x = self.maxpool3(x)
# N x 480 x 14 x 14
x = self.inception4a(x)
# N x 512 x 14 x 14
if self.training and self.aux_logits: # eval model lose this layer
aux1 = self.aux1(x)
x = self.inception4b(x)
# N x 512 x 14 x 14
x = self.inception4c(x)
# N x 512 x 14 x 14
x = self.inception4d(x)
# N x 528 x 14 x 14
if self.training and self.aux_logits: # eval model lose this layer
aux2 = self.aux2(x)
x = self.inception4e(x)
# N x 832 x 14 x 14
x = self.maxpool4(x)
# N x 832 x 7 x 7
x = self.inception5a(x)
# N x 832 x 7 x 7
x = self.inception5b(x)
# N x 1024 x 7 x 7
x = self.avgpool(x)
# N x 1024 x 1 x 1
x = torch.flatten(x, 1)
# N x 1024
x = self.dropout(x)
x = self.fc(x)
# N x 1000 (num_classes)
if self.training and self.aux_logits: # eval model lose this layer
return x, aux2, aux1 # 条件满足返回三个值,条件不满足返回一个值
return x
def _initialize_weights(self): # 初始化权重
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
class Inception(nn.Module): # 定义Inception模块
def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj):
super(Inception, self).__init__()
self.branch1 = BasicConv2d(in_channels, ch1x1, kernel_size=1)
self.branch2 = nn.Sequential(
BasicConv2d(in_channels, ch3x3red, kernel_size=1),
BasicConv2d(ch3x3red, ch3x3, kernel_size=3, padding=1) # 保证输出大小等于输入大小
)
self.branch3 = nn.Sequential(
BasicConv2d(in_channels, ch5x5red, kernel_size=1),
# 在官方的实现中,其实是3x3的kernel并不是5x5,这里我也懒得改了,具体可以参考下面的issue
# Please see https://github.com/pytorch/vision/issues/906 for details.
BasicConv2d(ch5x5red, ch5x5, kernel_size=5, padding=2) # 保证输出大小等于输入大小
)
self.branch4 = nn.Sequential(
nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
BasicConv2d(in_channels, pool_proj, kernel_size=1)
)
def forward(self, x): # 定义正向传播过程
branch1 = self.branch1(x)
branch2 = self.branch2(x)
branch3 = self.branch3(x)
branch4 = self.branch4(x)
outputs = [branch1, branch2, branch3, branch4]
return torch.cat(outputs, 1)
class InceptionAux(nn.Module): # 辅助分类器结构
def __init__(self, in_channels, num_classes):
super(InceptionAux, self).__init__()
self.averagePool = nn.AvgPool2d(kernel_size=5, stride=3)
self.conv = BasicConv2d(in_channels, 128, kernel_size=1) # output[batch, 128, 4, 4]
self.fc1 = nn.Linear(2048, 1024) # 128 * 4 * 4 = 2048
self.fc2 = nn.Linear(1024, num_classes)
def forward(self, x): # 定义正向传播过程
# aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14 这里是辅助分类器1、2对应的输入特征矩阵的维度
x = self.averagePool(x)
# aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
x = self.conv(x)
# N x 128 x 4 x 4
x = torch.flatten(x, 1) # 将特征矩阵展平
x = F.dropout(x, 0.5, training=self.training) # 在model.train()模式下self.training=True;在model.eval()模式下self.training=False
# N x 2048
x = F.relu(self.fc1(x), inplace=True)
x = F.dropout(x, 0.5, training=self.training)
# N x 1024
x = self.fc2(x)
# N x num_classes
return x
class BasicConv2d(nn.Module): # 定义一个BasicConv2d的类,继承自父类nn.Module
def __init__(self, in_channels, out_channels, **kwargs):
super(BasicConv2d, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
self.relu = nn.ReLU(inplace=True)
def forward(self, x): # 定义正向传播过程
x = self.conv(x)
x = self.relu(x)
return x
train:
import os
import sys
import json
import torch
import torch.nn as nn
from matplotlib import pyplot as plt
from torchvision import transforms, datasets
import torch.optim as optim
from torchvision.datasets import ImageFolder
from tqdm import tqdm
from model import GoogLeNet
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
# 解决中文显示问题
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
ROOT_TRAIN = r'E:/cnn/AlexNet/data/train'
ROOT_TEST = r'E:/cnn/AlexNet/data/val'
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
"val": transforms.Compose([transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
train_dataset = ImageFolder(ROOT_TRAIN, transform=data_transform["train"]) # 加载训练集
train_num = len(train_dataset) # 打印训练集有多少张图片
animal_list = train_dataset.class_to_idx # 获取类别名称以及对应的索引
cla_dict = dict((val, key) for key, val in animal_list.items()) # 将上面的键值对位置对调一下
json_str = json.dumps(cla_dict, indent=4) # 把类别和对应的索引写入根目录下class_indices.json文件中
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
batch_size = 32
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=0)
validate_dataset = ImageFolder(ROOT_TEST, transform=data_transform["val"]) # 载入测试集
val_num = len(validate_dataset) # 打印测试集有多少张图片
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=16, shuffle=False,
num_workers=0)
print("using {} images for training, {} images for validation.".format(train_num, val_num)) # 用于打印总的训练集数量和验证集数量
net = GoogLeNet(num_classes=2, aux_logits=True, init_weights=True)
# 如果要使用官方的预训练权重,注意是将权重载入官方的模型,不是我们自己实现的模型
# 官方的模型中使用了bn层以及改了一些参数,不能混用
# import torchvision
# net = torchvision.models.googlenet(num_classes=5)
# model_dict = net.state_dict()
# # 预训练权重下载地址: https://download.pytorch.org/models/googlenet-1378be20.pth
# pretrain_model = torch.load("googlenet.pth")
# del_list = ["aux1.fc2.weight", "aux1.fc2.bias",
# "aux2.fc2.weight", "aux2.fc2.bias",
# "fc.weight", "fc.bias"]
# pretrain_dict = {k: v for k, v in pretrain_model.items() if k not in del_list}
# model_dict.update(pretrain_dict)
# net.load_state_dict(model_dict)
net.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0003)
epochs = 10
best_acc = 0.0
save_path = './googleNet.pth'
train_steps = len(train_loader)
for epoch in range(epochs):
# train
net.train() # 训练时开启辅助分类器
running_loss = 0.0
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
logits, aux_logits2, aux_logits1 = net(images.to(device))
loss0 = loss_function(logits, labels.to(device)) # 主分类器的损失
loss1 = loss_function(aux_logits1, labels.to(device)) # 辅助分类器1的损失
loss2 = loss_function(aux_logits2, labels.to(device)) # 辅助分类器2的损失
loss = loss0 + loss1 * 0.3 + loss2 * 0.3 # 三个损失加一起,注意辅助分类器的权重为0.3
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
epochs,
loss)
# validate
net.eval() # 验证时关闭辅助分类器
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device)) # eval model only have last output layer
predict_y = torch.max(outputs, dim=1)[1]
acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch + 1, running_loss / train_steps, val_accurate))
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)
print('Finished Training')
if __name__ == '__main__':
main()
predict:
import os
import sys
import json
import torch
import torch.nn as nn
from matplotlib import pyplot as plt
from torchvision import transforms, datasets
import torch.optim as optim
from torchvision.datasets import ImageFolder
from tqdm import tqdm
from model import GoogLeNet
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
# 解决中文显示问题
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
ROOT_TRAIN = r'E:/cnn/AlexNet/data/train'
ROOT_TEST = r'E:/cnn/AlexNet/data/val'
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
"val": transforms.Compose([transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
train_dataset = ImageFolder(ROOT_TRAIN, transform=data_transform["train"]) # 加载训练集
train_num = len(train_dataset) # 打印训练集有多少张图片
animal_list = train_dataset.class_to_idx # 获取类别名称以及对应的索引
cla_dict = dict((val, key) for key, val in animal_list.items()) # 将上面的键值对位置对调一下
json_str = json.dumps(cla_dict, indent=4) # 把类别和对应的索引写入根目录下class_indices.json文件中
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
batch_size = 32
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=0)
validate_dataset = ImageFolder(ROOT_TEST, transform=data_transform["val"]) # 载入测试集
val_num = len(validate_dataset) # 打印测试集有多少张图片
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=16, shuffle=False,
num_workers=0)
print("using {} images for training, {} images for validation.".format(train_num, val_num)) # 用于打印总的训练集数量和验证集数量
net = GoogLeNet(num_classes=2, aux_logits=True, init_weights=True)
# 如果要使用官方的预训练权重,注意是将权重载入官方的模型,不是我们自己实现的模型
# 官方的模型中使用了bn层以及改了一些参数,不能混用
# import torchvision
# net = torchvision.models.googlenet(num_classes=5)
# model_dict = net.state_dict()
# # 预训练权重下载地址: https://download.pytorch.org/models/googlenet-1378be20.pth
# pretrain_model = torch.load("googlenet.pth")
# del_list = ["aux1.fc2.weight", "aux1.fc2.bias",
# "aux2.fc2.weight", "aux2.fc2.bias",
# "fc.weight", "fc.bias"]
# pretrain_dict = {k: v for k, v in pretrain_model.items() if k not in del_list}
# model_dict.update(pretrain_dict)
# net.load_state_dict(model_dict)
net.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0003)
epochs = 10
best_acc = 0.0
save_path = './googleNet.pth'
train_steps = len(train_loader)
for epoch in range(epochs):
# train
net.train() # 训练时开启辅助分类器
running_loss = 0.0
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
logits, aux_logits2, aux_logits1 = net(images.to(device))
loss0 = loss_function(logits, labels.to(device)) # 主分类器的损失
loss1 = loss_function(aux_logits1, labels.to(device)) # 辅助分类器1的损失
loss2 = loss_function(aux_logits2, labels.to(device)) # 辅助分类器2的损失
loss = loss0 + loss1 * 0.3 + loss2 * 0.3 # 三个损失加一起,注意辅助分类器的权重为0.3
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
epochs,
loss)
# validate
net.eval() # 验证时关闭辅助分类器
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device)) # eval model only have last output layer
predict_y = torch.max(outputs, dim=1)[1]
acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch + 1, running_loss / train_steps, val_accurate))
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)
print('Finished Training')
if __name__ == '__main__':
main()