EfficientNet
创新点
1.可宽度深度伸缩 res+sortcut+se+扩大
import match
import copy
from functools import partial
from collections import OrderedDict
from typing import Optional,Callable
import torch
import torch.nn as nn
from torch import Tensor
from torch.nn import functional as F
def _make_divisible(ch,divisor=8,min_ch=None):
if min_ch is None:
min_ch=divisor
new_ch=max(min_ch,int(ch+divisor/2)//divisor* divisor)
if new_ch<0.9*ch:
new_ch+=divisor
return new_ch
def drop_path(x,drop_prob:float=0.,traing:bool=False):
if drop_prob===0. or not training:
return x
keep_prob=1-drop_prob
shape=(x.shape[0],)+(1,)*(x.ndim-1)
random_tensor=keep_prob+torch.rand(shape,dtype=x.dtype,device=x.device)
random_tensor.floor_()
output=x.div(keep_prob)*random_tensor
return output
class DropPath(nn.Module):
def__init__(self,drop_prob=None):
super(DropPath,self).__init__()
self.drop_prob=drop_prob
def forward(self,x):
return drop_path(x,self.drop_prob,self.training)
class ConvBNActivation(nn.Sequential):
def__init__(self,in_planes:int,out_planes:int,
kernel_size=3,stride:int=1,
groups:int=1,
norm_layer:Optional[Callable[...,nn.Module]]=None,
activation_layer:Optional[Callable[...,nn.Module]]=None):
padding=(kernel_size-1)//2
if norm_layer is None:
norm_layer=nn.BatchNorm2d
if activation_layer is None:
activation_layer=nn.SiLU
super(ConvBNActivation,self).__init__(nn.Conv2d(in_channels=in_planes,out_channels=out_planes,kernel_size=kernel_size,stride=stride,padding=padding,group=group,bias=False),
norm_layer(out_planes),
activation_layer())
class SequeezeExcitation(nn.Module):
def__init__(self,input_c:int,expand_c:int,
squeeze_factor:int=4):
super(SqueezeExcitation,self).__init__()
squeeze_c=input_c//squeeze_factor
self.fc1=nn.Conv2d(expand_c,squeeze_c,1)
self.ac1=nn.SiLU()
self.fc2=nn.Conv2d(squeeze_c,expand_c,1)
self.ac2=nn.Sigmoid()
def forward(self,x:Tensor)->Tensor:
scale=F.adaptive_avg_pool2d(x.output_size=(1,1))
scale=self.fc1(scale)
scale=self.ac1(scale)
scale=self.fc2(scale)
scale=self.ac1(scale)
return scale*x
class InvertedResidualConfig:
def__init__(self,kernel:int,input_c:int,
out_c:int,
expanded_ratio:int,
stride:int,
use_se:bool,
drop_rate:float,index:str,
width_coefficient:float):
self.input_c=self.adjust_channels(input_c,width_coefficient)
self.kernel=kernel
self.expanded_c=self.input_c*expanded_ratio
self.out_c=self.adjust_channels(out_c,width_coefficient)
self.use_se=use_se
self.stride=stride
self.drop_rate=drop_rate
self.index=index
@staticmethod
def adjust_channels(channels:int,width_coefficient:float):
return _make_divisible(channels*width_coefficient,8)
class InvertedResidual(nn.Module):
def__init__(self,cnf:InvertedResidualConfig,
norm_layer:Callable[...,nn.Module]):
super(InvertedResidual,self).__init__()
if cnf.stride not in [1,2]:
raise ValueError("illegal stride value")
self.use_res_connect=(cnf.stride==1 and cnf.input_c==cnf.out_c)
layers=OrderedDict()
activation_layer=nn.SiLU
if cnf.expanded_c!=cnf.input_c:
layers.updata("expand_conv":ConvBNActivation(cnf.input_c,cnf.expanded_c,kernel_size=1,
norm_layer=norm_layer,
activation_layer=activation_layer)})
layers.update({"dwconv":ConvBNActivation(cnf.expaned_c,cnf.expaned_c,kernel_size=cnf.kernel,stride=cnf.stride,groups=cnf.expanded_c,
norm_layers=norm_layer,
activation_layers=activation_layer)})
if cnf.use_se:
layers.update({"se":SqueezeExcitation(cnf.input_c,cnf.expanded_c)})
layers.upadata({"project_conv":ConvBNActivation(cnf.expanded_c,cnf.out_c,kernel_size=1,norm_layer=norm_layer,activation_layer=nn.Identity)})
self.block=nn.Sequential(layers)
self.out_channels=cnf.out_c
self.is_strided=cnf.stride>1
if self.use_ses_connect and cnf.drop_rate>0:
self.dropout=DropPath(cnf.drop_rate)
else:
self.dropout=nn.Identity()
def forward(self,x:Tensor)->Tensor:
result=self.block(x)
result=self.dropout(result)
if self.use_res_connect:
result+=x
return result
class EfficientNet(nn.Module):
def __init__(self,
width_coefficient:float,
depth_coefficient:float,
num_classes:int=1000,
dropout_rate:float=0.2,
drop_connect_rate:float=0.2,
block:Optional[Callable[...,nn.Module]]=None,
norm_layer:Optional[Callable[...,nn.Module]]=None):
super(EfficientNet,self).__init__()
default_cnf=[[3,32,16,1,1,True,drop_connect_rate,1],
[3, 16, 24, 6, 2, True, drop_connect_rate, 2],
[5, 24, 40, 6, 2, True, drop_connect_rate, 2],
[3, 40, 80, 6, 2, True, drop_connect_rate, 3],
[5, 80, 112, 6, 1, True, drop_connect_rate, 3],
[5, 112, 192, 6, 2, True, drop_connect_rate, 4],
[3, 192, 320, 6, 1, True, drop_connect_rate, 1]]
def round_repeats(repeats):
return int(math.ceil(depth_coefficient*repeats))
if block is None:
block=InvertedResidual
if norm_layer is None:
norm_layer=partial(nn.BatchNorm2d,eps=1e-3,momentum=0.1)
adjust_channels=partial(InvertedResidualConfig.adjust_channels,width_coefficient=width_coefficient)
b=0
num_blocks=float(sum(round_repeats(i[-1]) for i in default_cnf))
inverted_residual_setting=[]
for stage,args in enumerate(default_cnf):
cnf=copy.copy(args)
for i in range(round_repeats(cnf.pop(-1))):
if i>0:
#除了第一层的stride是配置中的数目,其他为1
cnf[-3]=1
cnf[1]=cnf[2]
cnf[-1]=args[-2]*b/num_blocks
index=str(stage+1)+str(i+97)
inverted_residual_setting.append(bneck_conf(*cnf,index))
b+=1
layers=OrderedDict()
layers.update({"stem_conv":ConvBNActivation(in_planes=3,out_planes=adjust_channels(32),
kernel_size=3,
stride=2,
norm_layer=norm_layer)})
for cnf in inverted_residual_setting:
layers.updata({cnf.index:block(cnf,norm_layer)})
last_conv_input_c=inverted_residual_setting[-1].out_c
last_conv_output_c=adjust_channels(1280)
layers.update({"top":ConvBNActivation(in_planes=last_conv_input_c,out_planes=last_conv_output_c,
kernel_size=1,
norm_layer=norm_layer)})
self.features=nn.Sequential(layers)
self.avgpool=nn.AdaptiveAvgPool2d(1)
classifier=[]
if dropout_rate>0:
classifier.append(nn.Dropout(p=dropout_rate,inplace=True))
classifier.append(nn.Linear(last_conv_output_c,num_classes))
self.classifier=nn.Sequential(*classifier)
for m in self.modules():
if isinstance(m,nn.Conv2d):
nn.init.kaiming_normal_(m.weight,mode="fan_out")
if m.bias if not None:
nn.init.zeros_(m.bias)
elif isinstance(m,nn.BatchNorm2d):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
elif isinstance(m,nn.Linear):
nn.init.normal_(m.weight,0,0.01)
nn.init.zeros_(m.bias)
def _forward_impl(self,x:Tensor)->Tensor:
x=self.features(x)
x=self.avgpool(x)
x=torch.flatten(x,1)
x=self.classifier(x)
return x
def forward(self,x:Tensor)->Tensor:
return self._forward_impl(x)
def efficientnet_b0(num_classes=1000):
return EfficientNet(width_coefficient=1.0,
depth_coefficient=1.0,
dropout_rate=0.2,
num_classes=num_classes)
def efficientnet_b1(num_classes=1000):
return EfficientNet(width_coefficient=1.0,
depth_coefficient=1.1,
dropout_rate=0.2,
num_classes=num_classes)
def efficientnet_b2(num_classes=1000):
# input image size 260x260
return EfficientNet(width_coefficient=1.1,
depth_coefficient=1.2,
dropout_rate=0.3,
num_classes=num_classes)
def efficientnet_b3(num_classes=1000):
# input image size 300x300
return EfficientNet(width_coefficient=1.2,
depth_coefficient=1.4,
dropout_rate=0.3,
num_classes=num_classes)
def efficientnet_b4(num_classes=1000):
# input image size 380x380
return EfficientNet(width_coefficient=1.4,
depth_coefficient=1.8,
dropout_rate=0.4,
num_classes=num_classes)
def efficientnet_b5(num_classes=1000):
# input image size 456x456
return EfficientNet(width_coefficient=1.6,
depth_coefficient=2.2,
dropout_rate=0.4,
num_classes=num_classes)
def efficientnet_b6(num_classes=1000):
# input image size 528x528
return EfficientNet(width_coefficient=1.8,
depth_coefficient=2.6,
dropout_rate=0.5,
num_classes=num_classes)
def efficientnet_b7(num_classes=1000):
# input image size 600x600
return EfficientNet(width_coefficient=2.0,
depth_coefficient=3.1,
dropout_rate=0.5,
num_classes=num_classes)
数据使用自定义的图片进行读取,之后进行分训练集,验证集
from PIL import Image
import torch
from torch.utils.data import Dataset
class MyDataset(Dataset):
def __init__(self,images_path:list,images_class:list,transform=None):
self.images_path=images_path
self.images_class=images_class
self.transform=transform
def __len__(self):
return len(self.images_path)
def __getitem__(self,item):
img=Image.open(self.images_path[item])
if img.mode!='RGB':
raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])
label=self.images_class[item]
if self.transform is not None:
img=self.transform(img)
return img,lable
@staticmethod
def collate_fn(batch):
images,lables=tuple(zip(*batch))
images=torch.stack(images,dim=0)
labels=torch.as_tensor(labels)
return images,labels
读取图像 分训练集 验证
训练
验证
import os
import sys
import json
import pickle
import random
import torch
from tqdm import tqdm
import matplotlib.pylot as plt
def read_split_data(root:str,val_rate:float=0.2):
random.seed(0)
assert os.path.exists(root),"dataset root :{} dose not exist.".format(root)
flower_class=[cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root,cla))]
flower_class.sort()
class_indices=dict((k,v) for v,k in enumerate(flower_class))
json_str=json.dump(dict((val,key) for key,val in class_indices.items()),indent=4)
with open('class_indices.json','w') as json_file:
json_file.write(json_str)
train_images_path=[]
train_images_label=[]
val_images_path=[]
val_images_label=[]
every_class_num=[]
supported=[".jpg",".JPG",".png",".PNG"]
for cla in flower_class:
cla_path=os.path.json(root,cla)
images=[os.path.json(root,cla,i) for i in os.listdir(cla_path)
if os.path.splitext(i)[-1] in supported]
images_class=class_indices[cla]
every_class_num.append(len(images))
val_path=random.sample(images,k=int(len(images)*val_rate))
for img_path in images:
if img_path in val_path:
val_images_path.append(img_path)
val_images_label.append(image_class)
else:
train_images_path.append(img_path)
train_images_label.append(image_class)
print("{} images were found in the dataset.".format(sum(every_class_num)))
print("{} images for training.".format(len(train_images_path)))
print("{} images for validation.".format(len(val_images_path)))
plot_image=False
if plot_image:
plt.bar(range(len(flower_class)),every_class_num,align='center')
plt.xticks(range(len(flower_class)),flower_class)
for i,v in enumerate(every_class_num):
plt.text(x=i,y=v+5,s=str(v),ha='center')
plt.xlabel('image class')
plt.ylabel('number of images')
plt.title('flower class distribution')
plt.show()
return train_images_path,train_images_label,val_iamges_path,val_images_lable
def plot_data_loader_image(data_loader):
batch_size=data_loader.batch_size
plot_num=min(batch_size,4)
json_path='./class_indices.json'
assert os.path.exists(json_path),json_path+"dose not exists."
json_file=open(json_str,'r')
class_indices=json.load(json_file)
for data in data_loader:
images,label=data
for i in range(plot_num):
img=images[i].numpy().transpose(1,2,0)
img=(img*[0.5,0.5,0.5]+[0.5,0.5,0.5]*255
label=labels[i].item()
plt.subplot(1,plot_num,i+1)
plt.xlabel(class_indices[str(label)])
plt.xticks([])
plt.yticks([])
plt.imshow(img.astype('uint8'))
plt.show()
def write_pickle(list_info:list,file_name:str):
with open(file_name,'WB') as f:
pickle.dump(list_info,f)
def read_pickle(file_name:str)->list:
with open(file_name,'rb') as f:
info_list=pickle.load(f)
return info_list
def train_one_epoch (model,data_loader,optimizer,device,epoch):
model.train()
loss_function=torch.nn.CrossEntropyLoss()
mean_loss=torch.zeros(1).to(device)
optimizer.zero_grad()
data_loader=tqdm(data_loader)
for step,data in enumerate(data_loader):
images,labels=data
pred=model(images.to(device))
loss=loss_function(pred,labels.to(device))
loss.backward()
mean_loss=(mean_loss*step+loss.detach())/(step+1)
data_loader.desc="[epoch{}] mean loss {}".format(epoch,round(mean_loss.item(),3))
if not torch.isfinite(loss):
print('WARNING:non-finite loss,ending training',loss)
sys.exit(1)
optimizer.step()
optimizer.zero_grad()
return mean_loss.item()
@torch.no_grad()
def evaluate(model,data_loader,device):
model.eval()
total_num=len(data_loader.dataset)
sum_num=torch.zeros(1).to(device)
dat_loader=tqdm(data_loader)
for step,data in enumerate(data_loader):
images,labels=data
pred=model(images.to(device))
pred=torch.max(pred,dim=1)[1]
sum_sum+=torch.eq(pred,labels.to(device)).sum()
return sum_num.item()/total_num
训练
import os
import math
import argparse
import torch
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torchvison import transforms
import torch.optim.lr_scheduler as lr_scheduler
from model import efficientnet_b0 as cerate_model
from my_dataset import MyDataSet
from utils import read_split_data,train_one_epoch,evaluate
def main(args):
device=torch.device(args.device if torch.cuda.is_availabel() else "cpu")
print(args)
print('Start Tensorboard with "tensorboard --logdir=runs")
tb_writer=SummaryWriter()
if os.path.exists("./weight")is False:
os.makedirs("./weights")
train_images_path,train_images_label,val_images_path,val_iamges_label=read_split_data(args.data_path)
img_size={"B0":224,
"B1":240,
"B2": 260,
"B3": 300,
"B4": 380,
"B5": 456,
"B6": 528,
"B7": 600}
num_model = "B0"
data_transform={
"train":transforms.Compose([transforms.RandomResizedCrop(img_size[num_model]),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])]
"val":transforms.Compose([transforms.Resize(img_size[num_model]),
transforms.CenterCrop(img_size[num_model]),
transforms.Normalize([0.e,0.5,0.5],[0.5,0.5,0.5])}
train_dataset=MydataSet(images_path=train_images_path,
images_class=train_images_label,
transform=data_transform["train"])
val_dataset=MydataSet(images_path=val_images_path,
images_class=val_images_label,
transform=data_transforms["val"])
batch_size=args.batch_size
nm=min([os.cpu_count(),batch_size if batch_size>1 else 0.8])
print('USing {} dataloader workers every process'.format(nw))
train_loader=torch.utils.data.DataLoader(train_dataset,batch_size=batch_size,shuffle=True,pin_memory=True,num_worker=nw,collate_fn=train_dataset.collate_fn)
val_loader=torch.utils.data.DataLoader(val_dataset,batch_size=batch_size,shuffle=False,pin_memory=True,num_workers=nw,collate_fn=val_dataset.collate_fn)
model=create_model(num_classes=args.num_classes).to(device)
if args.weights!="":
if os.path.exists(args.weights):
weights_dict=torch.load(args.weights,map_location=device)
load_weight_dict={k:v for k,v in weight_dict.items()
if model.state_dict()[k].numel()==v.numel()}
print(model.load_state_dict(load_weight_dict,strict=False))
else:
raise FileNotFondError("not found weights file:{}".format(args.weights))
if args.freeze_layers:
for name,para in model.named_parameters():
if("features.top" not in name) and ("classifier" not in name):
para.requires_grad_(False)
else:
print("training {}".format(name))
pg=[p for p in model.parameters() if p.requires_grad]
optimizer=optim.SGD(pg,lr=args.lr,momentum=0.9,weight_decay=1E-4)
lf=lambda x((1+math.cos(x*math.pi/args.epochs))/2)*(1-args.lrf)+args.lrf
scheduler=lr_scheduler.LambdaLR(optimizer,lr_lambda=lf)
for epoch in range(args.epochs):
mean_loss=train_one_epoch(model=model,optimizer=optimizer,data_loader=train_loader,device=device,epoch=epoch)
scheduler.step()
acc=evaluate(model=model,data_loader=val_loader,device=device)
print("[epoch{}] accuracy:{}.format(epoch,round(acc,3)))
tags=["loss","accuracy","learning_rate"]
tb_writer.add_scaler(tags[0],mean_loss,epoch)
tb_writer.add_scaler(tags[1],acc,epoch)
tb_writer.add_scaler(tags[2],optimizer.param_groups[0]["lr"],epoch)
torch.save(model.state_dict(),"./weights/model-{}.path".format(epoch))
if __name__=='__main__':
parser=argparse.ArgumentParser()
parser.add_argument('--num_classes',type=int,default=5)
parser.add_argument('--epochs',type=int,default=30)
parser.add_argument('--batch_size',type=int,default=16)
parser.add_argument('--lr',type=float,default=0.01)
parser.add_argument('--lrf',type=float,default=0.01)
parser.add_argument('--data-path',type=str,default="/data/flower_photos")
parser.add_argument("--weights’,type=str,default='./efficientnetb0.pth'.help='initial weights path')
parser.add_argument('--freeze-layers',type=bool,defaule=False)
parser.add_argument('--device',default='cuda:0',help='device if(i.e.0 or )
opt=parser.parse_arg()
main(opt)
}