GoogLeNet Incepetion V1
Incepetion的出现理由?
- 为了提升网络的性能,增加网络的宽度和深度,宽度主要是通过融合不同尺度的特征,从而对特征进行不同感受野的提取。卷积核大小采用1、3和5,设定卷积步长stride=1之后,只要分别设定pad=0、1、2,卷积之后便得到相同维度的特征,直接拼接在一起,进入下一层特征的提取。
- 使用1*1卷积核降低网络参数。例如:上一层的输出为100x100x128,经过具有256个输出的5x5卷积层之后(stride=1,pad=2),输出数据为100x100x256。其中,卷积层的参数为128x5x5x256。假如上一层输出先经过具有32个输出的1x1卷积层,再经过具有256个输出的5x5卷积层,那么最终的输出数据仍为为100x100x256,但卷积参数量已经减少为128x1x1x32 + 32x5x5x256,大约减少了4倍。
GoogLeNet 结构主要是GoogLeNet Incepetion V1的堆叠
网络的详细参数如图:
典型网络结构解读
输入输入进行了零均值化的预处理操作图像,尺度为224x224x3
第一层卷积,使用7x7x64的卷积核,滑动步长2,padding为3,64通道,输出为112x112x64,卷积后进行ReLU操作,经过3x3的max pooling,步长为2,输出为((112 - 3+1)/2)+1=56,即56x56x64,再进行ReLU操作
第二层卷积层,使用3x3x192的卷积核,滑动步长为1,padding为1,192通道,输出为56x56x192,卷积后进行ReLU操作,经过3x3的max pooling,步长为2,输出为((56 - 3+1)/2)+1=28,即28x28x192,再进行ReLU操作
第三层Inception 3a层
分为四个分支,采用不同尺度的卷积核来进行处理
(1)64个1x1的卷积核,然后RuLU,输出28x28x64
(2)96个1x1的卷积核,作为3x3卷积核之前的降维,变成28x28x96,然后进行ReLU计算,再进行128个3x3的卷积,padding为1,输出28x28x128
(3)16个1x1的卷积核,作为5x5卷积核之前的降维,变成28x28x16,进行ReLU计算后,再进行32个5x5的卷积,padding为2,输出28x28x32
(4)pool层,使用3x3的核,padding为1,输出28x28x192,然后进行32个1x1的卷积,输出28x28x32。
将四个结果进行连接,对这四部分输出结果的第三维并联,即64+128+32+32=256,最终输出28x28x256
为了避免梯度消失和利用底层网络特征进行分类,网络额外增加了2个辅助的softmax作为辅助分类器。辅助分类器是将中间某一层的输出用作分类,并按一个较小的权重(0.3)加到最终分类结果中,这样相当于做了模型融合,同时给网络增加了反向传播的梯度信号,也提供了额外的正则化,对于整个网络的训练很有裨益。而在实际测试的时候,这两个额外的softmax会被去掉。
代码实现:
import torch
import torch.nn as nn
import torch.nn.functional as F
class BasicConv2d(nn.Module):
#将卷积层和Relu写成模块化
def__init__(self,in_channels,out_channels,**kwargs):
super(BasicConv2d,self).__init__()
self.conv=nn.Conv2d(in_channels,out_channels,**kwargs)
self.relu=nn.ReLU(inplace=True)
def forward(self,x):
x=self.conv(x)
x=self.relu(x)
return x
###################Inception模块
class Inception(nn.Module):
def__init__(self,in_channels,ch1x1,ch3x3red,ch3x3,ch5x5red,ch5x5,pool_proj):
super(Inception,self).__init__()
self.branch1=BasicConv2d(in_channels,ch1x1,kernel_size=1)
self.branch2=nn.Sequential(
BasicConv2d(in_channels,ch3x3red,kernel_size=1),
BasicConv2d(ch3x3red,ch3x3,kernel_size=3,padding=1)
)
self.branch3=nn.Sequential(
BasicConv2d(in_channels,ch5x5red,kernel_size=1),
BasicConv2d(ch5x5red,ch5x5,kernel_size=5,padding=2)
)
self.branch4=nn.Sequential(
nn.MaxPool2d(kernel_size=3,stride=1,padding=1),
BasicConv2d(in_channels,pool_proj,kernel_size=1)
)
def forward(self,x):
branch1=self.branch1(x)
branch2=self.branch2(x)
branch3=self.branch3(x)
branch4=self.branch4(x)
outputs=[branch1,branch2,branch3,branch4]
return torch.cat(outputs,1)
################################辅助分类器
class InceptionAux(nn.Module):
def __init__(self,in_channels,num_classes):
super(InceptionAux,self).__init__()
self.averagePool=nn.AvgPool2d(kernel_size=5,stride=3)
self.conv=BasicConv2d(in_channels,128,kernel_size=1)
self.fc1=nn.Linear(2048,1024)
self.fc2=nn.Linear(1024,num_classes)
def forward(self,x):
x=self.averagePool(x)
x=selg.conv(x)
x=torch.flatten(x,1)
x=F.dropout(x,0,5,training=self.training)
x=F.relu(self.fc1(x),inplace=True)
x=F.dropout(x,0.5,training=self.training)
x=self.fc2(x)
return x
class GoogLeNet(nn.Module):
def__init__(self,num_classes=1000,aux_logits=True,init_weights=False):
super(GoogLeNet,self).__init__()
self.aux_logits=aux_logits
self.conv1=BasicConv2d(3,64,kernel_size=7,stride=2,padding=3)
self.maxpool1=nn.MaxPool2d(3,stride=2,ceil_mode=True)
self.conv2=BasicConv2d(64,64,kernel_size=1)
self.conv3=BaicConv2d(64,192,kernel_size=3,padding=1)
self.maxpool2=nn.MaxPool2d(3,stride=2,ceil_mode=True)
self.inception3a=Inception(192,64,96,128,16,32,32)
self.inception3b=Inception(256,128,128,192,32,96,64)
self.maxpool3=nn.MaxPool2d(3,stride=2,ceil_mode=True)
self.inception4a=Inception(480,192,96,208,16,48,64)
self.inception4b=Inception(512,160,112,224,24,64,64)
self.inception4c=Inception(512,128,128,256,24,64,64)
self.inception4d=Inception(512,112,144,288,32,64,64)
self.inception4e=Inception(528,256,160,320,32,128,128)
self.maxpool4=nn.MaxPool2d(3,stride=2,ceil_mode=Ture)
self.inception5a=Inception(832,256,160,320,32,128,128)
self.inception5b=Inception(832,384,192,384,48,128,128)
if self.aux_logits:
self.aux1=InceptionAux(512,num_classes)
self.aux2=InceptionAux(528,num_classes)
#辅助分类器
self.avgpool=nn.AdaptiveAvgPool2d((1,1))
self.dropout=nn.Dropout(0.4)
self.fc=nn.Linear(1024,num_classes)
if init_weights:
self._initialize()
def forward(self,x):
x=self.conv1(x)
x=self.maxpool1(x)
x=self.conv2(x)
x=self.conv3(x)
x=self.maxpool2(x)
x=self.inception3a(x)
x=self.inception3b(X)
x=self.maxpool3(X)
x=self.inception4a(X)
if self.training and self.aux_logits:
aux1=self.aux1(X)
x=self.inception4b(x)
x=self.inception4c(X)
x=self.inception4d(X)
if self.training and self.aus_logits:
aux2=self.aux2(x)
x=self.inception4e(X)
x=self.maxpool4(x)
x=self.inception5a(X)
x=self.inception5b(x)
x=self.avgpool(x)
x=torch.flatten(x,1)
x=self.dropout(x)
x=self.fc(X)
if self.training and self.aux_logits:
return x,aux2,aux1
return x
训练模块
import os
import torch
import json
import torch.nn as nn
from torchvision import transforms,datasets
import torch.optim as optim
from tqdm import tqdm
from model import GoogLeNet
def main():
device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(using {} device.".format(device)
data_transform={
"train":transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5,0.5.0.5),(0.5,0.5,0.5))
]),
"val":transforms.Compose([
transforms.Resize((224,224)),
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])
data_root=os.path.abspath(os.path.join(os.getcwd(),"../.."))
image_path=os.path.join(data_root,"data_Set","flower_data")
assert os.path.exists(image_path),"{} path does not exist.".format(image_path)
train_datasets=datasets.ImageFolder(root=os.path.join(image_path,"train"),transform=data_transform["train"])
train_num=len(train_datasets)
flower_list=train_dataset.class_to_idex
cla_dict=dict((val,key) for key,val in flower_list.items())
json_str=json.dump(cla_dict,indent=4)
with open('class_indices.json','w') as json_file:
json_file.write(json_str)
batch_size=32
nw=min([os.cpu_count(),batch_size if batch_size>1 else 0,8])
print('Using{} dataloader workers every process'.format(nw))
train_loader=torch.utils.data.DataLoader(train_dataset,batch_size=batch_size,shuffle=True,num_workers=nw)
validate_dataset=datasets.ImageFolder(root=os.path.join(image_path,"val"),transform=data_transform["val"])
val_num=len(validata_dataset)
validate_loader=torch.utils.data.DataLoader(validate_dataset,batch_size=batch_size,shuffle=False,num_workers=nw)
net=GoogLeNet(num_classes=5,aux_logits=True,init_weights=True)
net.to(device)
loss_function=nn.CrossEntropyLoss()
optimizer=optim.Adam(net.parameters(),lr=0.0003)
epochs=30
best_acc=0.0
save_path='./googleNet.pth'
train_steps=len(train_loader)
for epoch in range(epochs):
net.train()
running_loss=0.0
train_bar=tqdm(train_loader)
for step,data in enumerate(train_bar):
images,labels=data
optimizer.zero_grad()
logits,aux_logits2,aux_logits1=net(image.to(device))
loss0=loss_function(logits,labels.to(device))
loss1=loss_function(aux_logits1,labels.to(device))
loss2=loss_function(aux_logits2,labels.to(device))
loss=loss0+loss1*0.3+loss2*0.3
loss.backward()
optimizer.step()
running_loss+=loss.item()
train_bar.desc="train epoch[{}/{}] loss:{:.3f}".format(epoch+1,epochs,loss)
net.eval()
acc=0.0
with torch.no_grad():
val_bar=tqdm(validate_loader)
for val_data in val_bar:
val_images,val_labels=val_data
outputs=net(val_imges.to(device))
predict_y=torch.max(outputs,dim=1)[1]
acc+=torch.eq(predict_y,val_labels.to(device)).sum().item()
val_accurate=acc/val_num
print('[epoch %d] train_loss :%.3d val_accuracy: %.3f' % (epoch+1,running_loss/train_steps,
val_accurate))
if val_accurate>best_acc:
best_acc=val_accurate
torch.save(net.state_dict(),save_path)
print('Finished Training')
if__name__=='__main__':
main()
根据训练的模型对图片进行预测
import os
import json
import torch
from PIL import Image
form torchvision import transforms
from matplotlib.pyplot as plt
from model import GoogLeNet
def main():
device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data_transform=transforms.Compose([
transforms.Resize((224,224)),
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
img_path="../tulip.jpg"
assert os.path.exists(img_path),"file: ‘{}’ dese not exist.".format(img_path)
img=Image.open(img_path)
plt.imshow(img)
img=data_transform(img)
img=torch.unsqueeze(img,dim=0)
json_path='./class_indices.json'
assert os.path.exists(json_path),"file: ‘{}’ dose not exist.".format(json_path)
json_file=open(json_path,"r")
class_indict=json.load(json_file)
model=GoogLeNet(num_classes=5,aux_logits=False).to(device)
weights_path="./googleNet.pth"
assert os.path.exists(weights_path),"file:'{}' dose not exist.".format(weights_path)
missing_keys,unexpected_keys=model.load_state_dict(torch.load(weights_path,map_location=device),strict=False)
model.eval()
with torch.no_grad():
output=torch.squeeze(model(img.to(device))).cpu()
predict=torch.softmax(output,dim=0)
predict_cla=torch.argmax(predict).numpy()
print_res="class:{} prob:{:.3}".format(class_indict[str(predict_cla)],predict[predict_cla].numpy())
#将预测的类别打印,和有多大的概率
plt.title(print_res)
print(print_res)
plt.show()
if__name__=='__main__':
main()