计算机视觉篇---图像分类实战+理论讲解(8)EfficientNet

EfficientNet
创新点
1.可宽度深度伸缩 res+sortcut+se+扩大
在这里插入图片描述

import match
import copy
from functools import partial
from collections import OrderedDict
from typing import Optional,Callable
import torch
import torch.nn as nn
from torch import Tensor
from torch.nn import functional as F
def _make_divisible(ch,divisor=8,min_ch=None):
     if min_ch is None:
         min_ch=divisor
     new_ch=max(min_ch,int(ch+divisor/2)//divisor* divisor)
     if new_ch<0.9*ch:
         new_ch+=divisor
     return new_ch
def drop_path(x,drop_prob:float=0.,traing:bool=False):
    if drop_prob===0. or not training:
        return x
    keep_prob=1-drop_prob
    shape=(x.shape[0],)+(1,)*(x.ndim-1)
    random_tensor=keep_prob+torch.rand(shape,dtype=x.dtype,device=x.device)
    random_tensor.floor_()
    output=x.div(keep_prob)*random_tensor
    return output
class DropPath(nn.Module):
    def__init__(self,drop_prob=None):
        super(DropPath,self).__init__()
        self.drop_prob=drop_prob
    def forward(self,x):
        return drop_path(x,self.drop_prob,self.training)
class ConvBNActivation(nn.Sequential):
    def__init__(self,in_planes:int,out_planes:int,
    kernel_size=3,stride:int=1,
    groups:int=1,
    norm_layer:Optional[Callable[...,nn.Module]]=None,
    activation_layer:Optional[Callable[...,nn.Module]]=None):
    padding=(kernel_size-1)//2
    if norm_layer is None:
        norm_layer=nn.BatchNorm2d
    if activation_layer is None:
        activation_layer=nn.SiLU
    super(ConvBNActivation,self).__init__(nn.Conv2d(in_channels=in_planes,out_channels=out_planes,kernel_size=kernel_size,stride=stride,padding=padding,group=group,bias=False),
    norm_layer(out_planes),
    activation_layer())
class SequeezeExcitation(nn.Module):
    def__init__(self,input_c:int,expand_c:int,
    squeeze_factor:int=4):
        super(SqueezeExcitation,self).__init__()
        squeeze_c=input_c//squeeze_factor
        self.fc1=nn.Conv2d(expand_c,squeeze_c,1)
        self.ac1=nn.SiLU()
        self.fc2=nn.Conv2d(squeeze_c,expand_c,1)
        self.ac2=nn.Sigmoid()
    def forward(self,x:Tensor)->Tensor:
        scale=F.adaptive_avg_pool2d(x.output_size=(1,1))
        scale=self.fc1(scale)
        scale=self.ac1(scale)
        scale=self.fc2(scale)
        scale=self.ac1(scale)
        return scale*x

class InvertedResidualConfig:
    def__init__(self,kernel:int,input_c:int,
    out_c:int,
    expanded_ratio:int,
    stride:int,
    use_se:bool,
    drop_rate:float,index:str,
    width_coefficient:float):
        self.input_c=self.adjust_channels(input_c,width_coefficient)
        self.kernel=kernel
        self.expanded_c=self.input_c*expanded_ratio
        self.out_c=self.adjust_channels(out_c,width_coefficient)
        self.use_se=use_se
        self.stride=stride
        self.drop_rate=drop_rate
        self.index=index
    @staticmethod
    def adjust_channels(channels:int,width_coefficient:float):
        return _make_divisible(channels*width_coefficient,8)

class InvertedResidual(nn.Module):
    def__init__(self,cnf:InvertedResidualConfig,
    norm_layer:Callable[...,nn.Module]):
        super(InvertedResidual,self).__init__()
        if cnf.stride not in [1,2]:
            raise ValueError("illegal stride value")
        self.use_res_connect=(cnf.stride==1 and cnf.input_c==cnf.out_c)
        layers=OrderedDict()
        activation_layer=nn.SiLU
        if cnf.expanded_c!=cnf.input_c:
            layers.updata("expand_conv":ConvBNActivation(cnf.input_c,cnf.expanded_c,kernel_size=1,
            norm_layer=norm_layer,
            activation_layer=activation_layer)})
            layers.update({"dwconv":ConvBNActivation(cnf.expaned_c,cnf.expaned_c,kernel_size=cnf.kernel,stride=cnf.stride,groups=cnf.expanded_c,
            norm_layers=norm_layer,
            activation_layers=activation_layer)})
    if cnf.use_se:
        layers.update({"se":SqueezeExcitation(cnf.input_c,cnf.expanded_c)})
        layers.upadata({"project_conv":ConvBNActivation(cnf.expanded_c,cnf.out_c,kernel_size=1,norm_layer=norm_layer,activation_layer=nn.Identity)})
        self.block=nn.Sequential(layers)
        self.out_channels=cnf.out_c
        self.is_strided=cnf.stride>1
        if self.use_ses_connect and cnf.drop_rate>0:
        self.dropout=DropPath(cnf.drop_rate)
    else:
        self.dropout=nn.Identity()
    def forward(self,x:Tensor)->Tensor:
        result=self.block(x)
        result=self.dropout(result)
        if self.use_res_connect:
            result+=x
        return result
class EfficientNet(nn.Module):
    def __init__(self,
        width_coefficient:float,
        depth_coefficient:float,
        num_classes:int=1000,
        dropout_rate:float=0.2,
        drop_connect_rate:float=0.2,
        block:Optional[Callable[...,nn.Module]]=None,
        norm_layer:Optional[Callable[...,nn.Module]]=None):
        super(EfficientNet,self).__init__()
        default_cnf=[[3,32,16,1,1,True,drop_connect_rate,1],
         [3, 16, 24, 6, 2, True, drop_connect_rate, 2],
                       [5, 24, 40, 6, 2, True, drop_connect_rate, 2],
                       [3, 40, 80, 6, 2, True, drop_connect_rate, 3],
                       [5, 80, 112, 6, 1, True, drop_connect_rate, 3],
                       [5, 112, 192, 6, 2, True, drop_connect_rate, 4],
                       [3, 192, 320, 6, 1, True, drop_connect_rate, 1]]
        def round_repeats(repeats):
            return int(math.ceil(depth_coefficient*repeats))
        if block is None:
            block=InvertedResidual
        if norm_layer is None:
            norm_layer=partial(nn.BatchNorm2d,eps=1e-3,momentum=0.1)
        adjust_channels=partial(InvertedResidualConfig.adjust_channels,width_coefficient=width_coefficient)
        b=0
        num_blocks=float(sum(round_repeats(i[-1]) for i in default_cnf))
        inverted_residual_setting=[]
        for stage,args in enumerate(default_cnf): 
            cnf=copy.copy(args)
            for i in range(round_repeats(cnf.pop(-1))):
                if i>0:
                #除了第一层的stride是配置中的数目,其他为1
                    cnf[-3]=1
                    cnf[1]=cnf[2]
                cnf[-1]=args[-2]*b/num_blocks
                index=str(stage+1)+str(i+97)
                inverted_residual_setting.append(bneck_conf(*cnf,index))
                b+=1
        layers=OrderedDict()
        layers.update({"stem_conv":ConvBNActivation(in_planes=3,out_planes=adjust_channels(32),
        kernel_size=3,
        stride=2,
        norm_layer=norm_layer)})
        for cnf in inverted_residual_setting:
            layers.updata({cnf.index:block(cnf,norm_layer)})
        last_conv_input_c=inverted_residual_setting[-1].out_c
        last_conv_output_c=adjust_channels(1280)
        layers.update({"top":ConvBNActivation(in_planes=last_conv_input_c,out_planes=last_conv_output_c,
        kernel_size=1,
        norm_layer=norm_layer)})
        self.features=nn.Sequential(layers)
        self.avgpool=nn.AdaptiveAvgPool2d(1)
        classifier=[]
        if dropout_rate>0:
            classifier.append(nn.Dropout(p=dropout_rate,inplace=True))
            classifier.append(nn.Linear(last_conv_output_c,num_classes))
            self.classifier=nn.Sequential(*classifier)
        for m in self.modules():
            if isinstance(m,nn.Conv2d):
                nn.init.kaiming_normal_(m.weight,mode="fan_out")
                if m.bias if not None:
                    nn.init.zeros_(m.bias)
                elif isinstance(m,nn.BatchNorm2d):
                    nn.init.ones_(m.weight)
                    nn.init.zeros_(m.bias)
                elif isinstance(m,nn.Linear):
                    nn.init.normal_(m.weight,0,0.01)
                    nn.init.zeros_(m.bias)
        
       def _forward_impl(self,x:Tensor)->Tensor:
           x=self.features(x)
           x=self.avgpool(x)
           x=torch.flatten(x,1)
           x=self.classifier(x)
           return x
       def forward(self,x:Tensor)->Tensor:
           return self._forward_impl(x)
def efficientnet_b0(num_classes=1000):
    return EfficientNet(width_coefficient=1.0,
    depth_coefficient=1.0,
    dropout_rate=0.2,
    num_classes=num_classes)

def efficientnet_b1(num_classes=1000):
    return EfficientNet(width_coefficient=1.0,
    depth_coefficient=1.1,
    dropout_rate=0.2,
    num_classes=num_classes)


def efficientnet_b2(num_classes=1000):
    # input image size 260x260
    return EfficientNet(width_coefficient=1.1,
                        depth_coefficient=1.2,
                        dropout_rate=0.3,
                        num_classes=num_classes)


def efficientnet_b3(num_classes=1000):
    # input image size 300x300
    return EfficientNet(width_coefficient=1.2,
                        depth_coefficient=1.4,
                        dropout_rate=0.3,
                        num_classes=num_classes)


def efficientnet_b4(num_classes=1000):
    # input image size 380x380
    return EfficientNet(width_coefficient=1.4,
                        depth_coefficient=1.8,
                        dropout_rate=0.4,
                        num_classes=num_classes)


def efficientnet_b5(num_classes=1000):
    # input image size 456x456
    return EfficientNet(width_coefficient=1.6,
                        depth_coefficient=2.2,
                        dropout_rate=0.4,
                        num_classes=num_classes)


def efficientnet_b6(num_classes=1000):
    # input image size 528x528
    return EfficientNet(width_coefficient=1.8,
                        depth_coefficient=2.6,
                        dropout_rate=0.5,
                        num_classes=num_classes)


def efficientnet_b7(num_classes=1000):
    # input image size 600x600
    return EfficientNet(width_coefficient=2.0,
                        depth_coefficient=3.1,
                        dropout_rate=0.5,
                        num_classes=num_classes)    
                
                  


数据使用自定义的图片进行读取,之后进行分训练集,验证集

from PIL import Image
import torch
from torch.utils.data import Dataset
class MyDataset(Dataset):
    def __init__(self,images_path:list,images_class:list,transform=None):
        self.images_path=images_path
        self.images_class=images_class
        self.transform=transform
    def __len__(self):
        return len(self.images_path)
    def __getitem__(self,item):
        img=Image.open(self.images_path[item])
        if img.mode!='RGB':
            raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])
        label=self.images_class[item]
        if self.transform is not None:
            img=self.transform(img)
        return img,lable
    @staticmethod
    def collate_fn(batch):
        images,lables=tuple(zip(*batch))
        images=torch.stack(images,dim=0)
        labels=torch.as_tensor(labels)
        return images,labels
    

读取图像 分训练集 验证
训练
验证

import os
import sys
import json
import pickle
import random
import torch
from tqdm import tqdm
import matplotlib.pylot as plt
def read_split_data(root:str,val_rate:float=0.2):
    random.seed(0)
    assert os.path.exists(root),"dataset root :{} dose not exist.".format(root)
    flower_class=[cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root,cla))]
    flower_class.sort()
    class_indices=dict((k,v) for v,k in enumerate(flower_class))
    json_str=json.dump(dict((val,key) for key,val in class_indices.items()),indent=4)
    with open('class_indices.json','w') as json_file:
        json_file.write(json_str)
    train_images_path=[]
    train_images_label=[]
    val_images_path=[]
    val_images_label=[]
    every_class_num=[]
    supported=[".jpg",".JPG",".png",".PNG"]
    for cla in flower_class:
       cla_path=os.path.json(root,cla)
       images=[os.path.json(root,cla,i) for i in os.listdir(cla_path)
       if os.path.splitext(i)[-1] in supported]
       images_class=class_indices[cla]
       every_class_num.append(len(images))
       val_path=random.sample(images,k=int(len(images)*val_rate))
       for img_path in images:
           if img_path in val_path:
               val_images_path.append(img_path)
               val_images_label.append(image_class)
           else:
               train_images_path.append(img_path)
               train_images_label.append(image_class)
      print("{} images were found in the dataset.".format(sum(every_class_num)))
      print("{} images for training.".format(len(train_images_path)))
      print("{} images for validation.".format(len(val_images_path)))
      plot_image=False
      if plot_image:
          plt.bar(range(len(flower_class)),every_class_num,align='center')
          plt.xticks(range(len(flower_class)),flower_class)
          for i,v in enumerate(every_class_num):
              plt.text(x=i,y=v+5,s=str(v),ha='center')
          plt.xlabel('image class')
          plt.ylabel('number of images')
          plt.title('flower class distribution')
          plt.show()
      return train_images_path,train_images_label,val_iamges_path,val_images_lable
def plot_data_loader_image(data_loader):
    batch_size=data_loader.batch_size
    plot_num=min(batch_size,4)
    json_path='./class_indices.json'
    assert os.path.exists(json_path),json_path+"dose not exists."
    json_file=open(json_str,'r')
    class_indices=json.load(json_file)
    for data in data_loader:
        images,label=data
        for i in range(plot_num):
            img=images[i].numpy().transpose(1,2,0)
            img=(img*[0.5,0.5,0.5]+[0.5,0.5,0.5]*255
            label=labels[i].item()
            plt.subplot(1,plot_num,i+1)
            plt.xlabel(class_indices[str(label)])
            plt.xticks([])
            plt.yticks([])
            plt.imshow(img.astype('uint8'))
        plt.show()
def write_pickle(list_info:list,file_name:str):
    with open(file_name,'WB') as f:
        pickle.dump(list_info,f)
def read_pickle(file_name:str)->list:
    with open(file_name,'rb') as f:
        info_list=pickle.load(f)
        return info_list
def train_one_epoch (model,data_loader,optimizer,device,epoch):
    model.train()
    loss_function=torch.nn.CrossEntropyLoss()
    mean_loss=torch.zeros(1).to(device)
    optimizer.zero_grad()
    data_loader=tqdm(data_loader)
    for step,data in enumerate(data_loader):
        images,labels=data
        pred=model(images.to(device))
        loss=loss_function(pred,labels.to(device))
        loss.backward()
        mean_loss=(mean_loss*step+loss.detach())/(step+1)
        data_loader.desc="[epoch{}] mean loss {}".format(epoch,round(mean_loss.item(),3))
        if not torch.isfinite(loss):
            print('WARNING:non-finite loss,ending training',loss)
            sys.exit(1)
        optimizer.step()
        optimizer.zero_grad()
    return mean_loss.item()
@torch.no_grad()
def evaluate(model,data_loader,device):
    model.eval()
    total_num=len(data_loader.dataset)
    sum_num=torch.zeros(1).to(device)
    dat_loader=tqdm(data_loader)
    for step,data in enumerate(data_loader):
        images,labels=data
        pred=model(images.to(device))
        pred=torch.max(pred,dim=1)[1]
        sum_sum+=torch.eq(pred,labels.to(device)).sum()
    return sum_num.item()/total_num

训练

import os 
import math
import argparse
import torch
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torchvison import transforms
import torch.optim.lr_scheduler as lr_scheduler

from model import efficientnet_b0 as cerate_model
from my_dataset import MyDataSet
from utils import read_split_data,train_one_epoch,evaluate
def main(args):
    device=torch.device(args.device if torch.cuda.is_availabel() else "cpu")
    print(args)
    print('Start Tensorboard with "tensorboard --logdir=runs")
    tb_writer=SummaryWriter()
    if os.path.exists("./weight")is False:
        os.makedirs("./weights")
    train_images_path,train_images_label,val_images_path,val_iamges_label=read_split_data(args.data_path)
    img_size={"B0":224,
    "B1":240,
     "B2": 260,
                "B3": 300,
                "B4": 380,
                "B5": 456,
                "B6": 528,
                "B7": 600}
    num_model = "B0"
    data_transform={
    "train":transforms.Compose([transforms.RandomResizedCrop(img_size[num_model]),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])]
    "val":transforms.Compose([transforms.Resize(img_size[num_model]),
    transforms.CenterCrop(img_size[num_model]),
    transforms.Normalize([0.e,0.5,0.5],[0.5,0.5,0.5])}
    train_dataset=MydataSet(images_path=train_images_path,
    images_class=train_images_label,
    transform=data_transform["train"])
    val_dataset=MydataSet(images_path=val_images_path,
    images_class=val_images_label,
    transform=data_transforms["val"])
    batch_size=args.batch_size
    nm=min([os.cpu_count(),batch_size if batch_size>1 else 0.8])
    print('USing {} dataloader workers every process'.format(nw))
    train_loader=torch.utils.data.DataLoader(train_dataset,batch_size=batch_size,shuffle=True,pin_memory=True,num_worker=nw,collate_fn=train_dataset.collate_fn)
    val_loader=torch.utils.data.DataLoader(val_dataset,batch_size=batch_size,shuffle=False,pin_memory=True,num_workers=nw,collate_fn=val_dataset.collate_fn)
    model=create_model(num_classes=args.num_classes).to(device)
    if args.weights!="":
        if os.path.exists(args.weights):
            weights_dict=torch.load(args.weights,map_location=device)
            load_weight_dict={k:v for k,v in weight_dict.items()
            if model.state_dict()[k].numel()==v.numel()}
            print(model.load_state_dict(load_weight_dict,strict=False))
            else:
                raise FileNotFondError("not found weights file:{}".format(args.weights))
    if args.freeze_layers:
        for name,para in model.named_parameters():
            if("features.top" not in name) and ("classifier" not in name):
                para.requires_grad_(False)
            else:
                print("training {}".format(name))


    pg=[p for p in model.parameters() if p.requires_grad]
    optimizer=optim.SGD(pg,lr=args.lr,momentum=0.9,weight_decay=1E-4)
    lf=lambda x((1+math.cos(x*math.pi/args.epochs))/2)*(1-args.lrf)+args.lrf
    scheduler=lr_scheduler.LambdaLR(optimizer,lr_lambda=lf)
    for epoch in range(args.epochs):
        mean_loss=train_one_epoch(model=model,optimizer=optimizer,data_loader=train_loader,device=device,epoch=epoch)
        scheduler.step()
        acc=evaluate(model=model,data_loader=val_loader,device=device)
        print("[epoch{}] accuracy:{}.format(epoch,round(acc,3)))
        tags=["loss","accuracy","learning_rate"]
        tb_writer.add_scaler(tags[0],mean_loss,epoch)
        tb_writer.add_scaler(tags[1],acc,epoch)
        tb_writer.add_scaler(tags[2],optimizer.param_groups[0]["lr"],epoch)
        torch.save(model.state_dict(),"./weights/model-{}.path".format(epoch))
if __name__=='__main__':
    parser=argparse.ArgumentParser()
    parser.add_argument('--num_classes',type=int,default=5)
    parser.add_argument('--epochs',type=int,default=30)
    parser.add_argument('--batch_size',type=int,default=16)
    parser.add_argument('--lr',type=float,default=0.01)
    parser.add_argument('--lrf',type=float,default=0.01)
    parser.add_argument('--data-path',type=str,default="/data/flower_photos")
    parser.add_argument("--weights’,type=str,default='./efficientnetb0.pth'.help='initial weights path')
    parser.add_argument('--freeze-layers',type=bool,defaule=False)
    parser.add_argument('--device',default='cuda:0',help='device if(i.e.0 or )
    opt=parser.parse_arg()
    main(opt)
       
    



}
    
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
### 回答1: 国科大计算机视觉考试是考核计算机视觉领域基础知识和技术能力的一种考试形式。该考试主要针对计算机视觉领域的学习者和研究人员,希望在该领域获得证书或深入研究该领域,以增强个人能力和市场竞争优势。 该考试主要涵盖了计算机视觉领域的基础理论、图像处理、计算机视觉模型及应用等方面的知识点。考试内容包括单选、多选和简答题等多种形式,考试难度逐步递进,旨在全面测试考生的知识水平和解决问题的能力。 参加国科大计算机视觉考试需要具备相关领域的基础知识和技术能力,并具备一定的实践经验。同时,考试成绩也是进一步深入学习计算机视觉相关课程和项目的门槛。 总之,国科大计算机视觉考试是一个全面测试考生在计算机视觉领域基础理论和实践能力的考试,可以对学习者和研究人员在这一领域的进一步深入学习和发展提供有力支持。 ### 回答2: 据了解,国科大计算机视觉考试是国家计算机视觉领域的重要考试之一,针对此领域的研究人才进行选拔和培养。计算机视觉是一门交叉学科,它使用计算机技术来模拟人类视觉系统,实现对图像和视频的感知、理解和分析,具有广泛的应用前景。 该考试主要涉及到计算机视觉的相关理论知识和技术应用,如图像处理、模式识别、机器学习、深度学习等方面。考生需要掌握基本的数学和编程知识,熟悉计算机视觉的主要算法和工具,具备实际开发和应用能力。 参加国科大计算机视觉考试需要有相关专业背景和一定的实践经验,具备综合素质和创新能力,能够独立思考和解决实际问题。考生的总成绩将综合评定笔试成绩、面试表现以及综合素质等因素,选拔最具潜力和实力的优秀人才。 国科大计算机视觉考试的考核标准和要求比较高,但是通过考试可以进入国家重点实验室或知名企业,开展计算机视觉领域的研究和应用工作,获得广阔的发展空间和职业前景。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

free_girl_fang

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值