fastai2 实现SSD

_helen_520

已于 2023-05-06 11:19:37 修改

阅读量756

点赞数 1

文章标签：深度学习计算机视觉人工智能

于 2023-05-04 19:59:18 首次发布

本文链接：https://blog.csdn.net/haronchou/article/details/130480794

版权

文章详细介绍了在fastai2框架下实现SSD目标检测模型时遇到的问题，包括锚点（anchor）转换、模型分割（split）和冻结（freeze）的陷阱，以及损失函数的正确应用。作者提供了代码示例来解决这些问题，并分享了数据预处理、bbox范围和转换、损失计算等方面的要点。

摘要由CSDN通过智能技术生成

https://github.com/search?q=fastai+ssd 有几个值得参考的代码，好好学习。
- GitHub - Samjoel3101/SSD-Object-Detection: I am working on a SSD Object Detector using fastai and pytorch fastai2实现的SSD，终于找到了code。
- https://github.com/sidravic/SSD_ObjectDetection_2/tree/master/train 这也是fastai2实现的ssd
很重要的参考：mAP的参考，基于fastai2的结构：GitHub - rbrtwlz/fastai_object_detection: Extension of the fastai library to include object detection.
- 计划是将这个mAP的计算同SSD结合起来就好了。
fastai2的SSD，来自：dhblog - Object Detection from scratch - Single Shot Detector
- 这个获取data的方式帮助很大
备注大坑：anchor
- 由于fastai ssd的原始代码来自Fastai2018 part2的course，当时的pascal_multi.ipynb是使用的fastai 0.7，所以bbox的范围是[0~1]，其anchor也就是[0~1]，但是fastai1， fastai2的bbox范围都是[-1,1]，所以anchor，iou的anchor也要转换为[-1,1]，loss才是对的。在jav的代码中有详细的说明，这里是个大坑。https://nbviewer.org/github/jav0927/course-v3/blob/master/SSD_Object_Detection.ipynb

1. 关于模型split和freeze的大坑

① 只给了m.body=resnet34的params
② 给了所有的参数

①和②的差别是很大的，主要在于构建优化器Optimizer的时候，其构造函数Adam给将m.body的params拿到Optimizer中去了。相当于之后的step grad更新等，都只有Adam里面的参数，这就是一个致命的问题。所以，最后的loss有很大的区别，训练不起来。

head的部分的参数都没有进入优化器，相当于没有训练，难怪loss都训练不起来。
所有的Body参数长度只有108；所以Optimizer里面就是没有head的参数，压根就没更新
去看了head层的conv2d的weight参数，证实了猜想，其值压根就没有改变。

错误的方法②：来自jav0927：https://github.com/jav0927/course-v3/blob/master/Fastai2_SSD_Model.ipynb

2. others

fastai2和fastai1的bbox都是：x1,y1,x2,y2格式；显示框plt都是x,y,h,w格式
fastai2的bbox范围是[-1,1]；显示到224需要变换：
for i,ax in enumerate(axes.flat): # y~[-1,1] ([-1,1] + 1)/2~[0,1]
show_ground_truth(ax, x[i], ((y[0][i] + 1)/2 * 224).cpu(), y[1][i].cpu())
def draw_rect(ax, b, color='white'):
patch = ax.add_patch(patches.Rectangle(b[:2], *b[-2:], fill=False, edgecolor=color, lw=2))
的

" 使用fastai v2 重写ssd by fastai course-v2 2018 part2 pascal_multi.ipynb "

# data pascal_voc2007


import warnings
warnings.filterwarnings('ignore')

import sys
sys.path.insert(0, '/home/zhr/fastai2/fastai_object_detection/fastai_object_detection') # debug源码，而非package

from pathlib import Path
from fastai.vision.all import *
# from zhr_util import get_annotations
from zhr_util import ssd_loss, SSD_Head, SSD_MultiHead, FocalLoss

path = Path('/home/helen/dataset/pascal_2007')

trn_im_names, trn_truths = get_annotations(path/'train.json')
val_im_names, val_truths = get_annotations(path/'valid.json')
# tst_im_names, tst_truths = get_annotations(path/'test.json') 
tot_im_names, tot_truths = [trn_im_names + val_im_names, trn_truths + val_truths]

img_y_dict = dict(zip(tot_im_names, tot_truths))
truth_data_func = lambda o: img_y_dict[o.name]

sz=224       # Image size
bs=64        # Batch size

item_tfms = [Resize(sz, method='squish'),]
batch_tfms = [Rotate(), Flip(), Dihedral()]

getters = [lambda o: path/'train'/o, lambda o: img_y_dict[o][0], lambda o: img_y_dict[o][1]]

pascal = DataBlock(blocks=(ImageBlock, BBoxBlock, BBoxLblBlock),
                   splitter=RandomSplitter(),
                   getters=getters,
                   item_tfms=item_tfms,
                   batch_tfms=batch_tfms,
                   n_inp=1)
dls = pascal.dataloaders(tot_im_names,bs=bs)
# dls.vocab

k = 9
head_reg4 = SSD_MultiHead(k, -3., dls)
body = create_body(resnet34(True))
model = nn.Sequential(body, head_reg4)

ssd_learner = Learner(dls, model, loss_func=ssd_loss)
ssd_learner.fit_one_cycle(3, 1e-3)

import json
import collections
from fastai.vision.all import *

def get_annotations(fname, prefix=None):
    "Open a COCO style json in `fname` and returns the lists of filenames (with maybe `prefix`) and labelled bboxes."
    annot_dict = json.load(open(fname))
    id2images, id2bboxes, id2cats = {}, collections.defaultdict(list), collections.defaultdict(list)
    classes = {}
    for o in annot_dict['categories']:
        classes[o['id']] = o['name']
    for o in annot_dict['annotations']:
        bb = o['bbox']
        id2bboxes[o['image_id']].append([bb[0],bb[1], bb[2]+bb[0], bb[3]+bb[1]])
        id2cats[o['image_id']].append(classes[o['category_id']])
    for o in annot_dict['images']:
        if o['id'] in id2bboxes:
            id2images[o['id']] = ('') + o['file_name']
    ids = list(id2images.keys())
    return [id2images[k] for k in ids], [[id2bboxes[k], id2cats[k]] for k in ids]

" 多类别的标签：fastai v2版本的使用方法 "
# if 0:
    # df = pd.read_csv(path/'train.csv')

    # def get_x(r): return path/'train'/r['fname']
    # def get_y(r): return r['labels'].split(' ')

    # # dblock = DataBlock(blocks=(ImageBlock, MultiCategoryBlock),
    # #                    get_x = get_x, get_y = get_y)
    # # dsets = dblock.datasets(df)

    # def splitter(df):
    #     train = df.index[~df['is_valid']].tolist()
    #     valid = df.index[df['is_valid']].tolist()
    #     return train,valid

    # dblock = DataBlock(blocks=(ImageBlock, MultiCategoryBlock),
    #                 splitter=splitter,
    #                 get_x=get_x, 
    #                 get_y=get_y,
    #                 item_tfms = RandomResizedCrop(224, min_scale=0.35))
    # dls = dblock.dataloaders(df)
        
    # dls.show_batch(max_n=9, figsize=(8, 6))
__all__ = ['get_ssd_model','ssd_resnet34', 'ssd_loss']

# Cell
import torch
from torch import nn
from torch.nn import Module
from torchvision.ops.boxes import batched_nms
from torch.hub import load_state_dict_from_url
from functools import partial
from fastai.vision.all import delegates

from fastai.vision import *
from fastai.callback import *

from fastai.vision import models
from fastai.vision.learner import create_body
from fastai.callback.hook import num_features_model
from fastai.layers import *

import torch.nn.functional as F

# Method used to match the shape of the conv_ssd_layer to the ground truth's shape
def flatten_conv(x,k):
    # Flatten the 4x4 grid to dim16 vectors
    bs,nf,gx,gy = x.size()
    x = x.permute(0,2,3,1).contiguous()
    return x.view(bs,-1,nf//k)

# Standard convolution with stride=2 to halve the size of the image
class OutConv(nn.Module):
    # Output Layers for SSD-Head. Contains oconv1 for Classification and oconv2 for Detection
    def __init__(self, k, nin, bias, dls):
        super().__init__()
        self.k = k
        self.oconv1 = nn.Conv2d(nin, (len(dls.vocab))*k, 3, padding=1)
        self.oconv2 = nn.Conv2d(nin, 4*k, 3, padding=1)
        self.oconv1.bias.data.zero_().add_(bias)
        
    def forward(self, x):
        return [flatten_conv(self.oconv2(x), self.k), # 先box，再label
                flatten_conv(self.oconv1(x), self.k)]
    
# SSD convolution that camptures bounding box and class
class StdConv(nn.Module):
    # Standard Convolutional layers 
    def __init__(self, nin, nout, stride=2, drop=0.1):
        super().__init__()
        self.conv = nn.Conv2d(nin, nout, 3, stride=stride, padding=1)
        self.bn = nn.BatchNorm2d(nout)
        self.drop = nn.Dropout(drop)
        
    def forward(self, x): return self.drop(self.bn(F.relu(self.conv(x))))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class SSD_Head(nn.Module):
    def __init__(self, k, bias, dls):
        super().__init__()
        self.drop = nn.Dropout(0.25)
        self.sconv0 = StdConv(512,256, stride=1)
        self.sconv2 = StdConv(256,256)
        self.out = OutConv(k, 256, bias, dls)
        
    def forward(self, x):
        x = self.drop(F.relu(x))
        x = self.sconv0(x)
        x = self.sconv2(x)
        return self.out(x)



def one_hot_embedding(labels, num_classes):
    return torch.eye(num_classes)[labels].cuda()

# 还是写成GPU格式更为有效，否则
class BCE_Loss(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.num_classes = num_classes

    def forward(self, pred, targ):
        t = one_hot_embedding(targ.squeeze(), self.num_classes)
        t = t[:,1:] # Start from 1 to exclude the Background
        x = pred[:,1:]
        w = self.get_weight(x,t)
        return F.binary_cross_entropy_with_logits(x, t, w.detach(), reduction='sum')/self.num_classes
    
    def get_weight(self,x,t): return None

class FocalLoss(BCE_Loss):
    def get_weight(self,x,t):
        alpha,gamma = 0.25,1
        p = x.sigmoid()
        pt = p*t + (1-p)*(1-t)
        w = alpha*t + (1-alpha)*(1-t)
        return w * (1-pt).pow(gamma)

#convert center/height/width to fastai top left and bottom right coordinates
def cthw2corners(boxes):
    top = (boxes[:,0] - boxes[:,2]/2).view(-1,1)
    left = (boxes[:,1] - boxes[:,3]/2).view(-1,1)
    bot = (boxes[:,0] + boxes[:,2]/2).view(-1,1)
    right = (boxes[:,1] + boxes[:,3]/2).view(-1,1)
    return torch.cat([top,left,bot,right],dim=1)
def hw2corners(ctr, hw): 
    # Function to convert BB format: (centers and dims) -> corners
    return torch.cat([ctr-hw/2, ctr+hw/2], dim=1)
# Filter out all zero-valued bounding boxes
def un_pad(boxes,labels):
    bb_keep = ((boxes[:,2] - boxes[:,0])>0).nonzero()[:,0]
    return boxes[bb_keep],labels[bb_keep]

# Calculate the area of a bounding box
def box_area(boxes):
    return (boxes[:,2] - boxes[:,0]) * (boxes[:,3] - boxes[:,1])

# Calculate the intersection of two given bounding boxes
def intersect(box_a,box_b):
    #make sure box_a and box_b exists, otherwise undefine behavior if you call the func
    top_left = torch.max(box_a[:,None,:2],box_b[None,:,:2])
    bot_right = torch.min(box_a[:,None,2:],box_b[None,:,2:])
    inter = torch.clamp((bot_right - top_left),min=0)
    return inter[:,:,0] * inter[:,:,1]

# Calculate Jaccard (IOU)
def iou(bbox,anchor):
    #bbox is gt_bb, anchor is anchor box, all in fastai style
    if len(bbox.shape) == 1: bbox = bbox[None,...]
    inter = intersect(bbox,anchor)
    union = box_area(bbox).unsqueeze(dim=1) + box_area(anchor).unsqueeze(dim=0) - inter #to broadcast shape to (N,16),where N is number of gt_bb for single image
    return inter / union
# Transform activations to bounding box format
def act_to_bbox(activation,anchor):
    activation = torch.tanh(activation) #force scale to be -1,1
    anchor = anchor.to(device)
    act_center = anchor[:,:2]+ (activation[:,:2]/2 * grid_sizes.float().to(activation.device))
    act_hw = anchor[:,2:] * (activation[:,2:]/2 + 1)
    # return cthw2corners(torch.cat([act_center,act_hw],dim=1))
    return hw2corners(act_center, act_hw)# 速度更快

  # Map to Ground Truth
def map_to_gt(overlaps):
    prior_overlap,prior_idx = overlaps.max(dim=1)
    sec_overlap,sec_idx = overlaps.max(dim=0)
    sec_overlap[prior_idx] = 4.99
    for i,o in enumerate(prior_idx): 
        sec_idx[o] = i
    return sec_overlap,sec_idx

class SSD_MultiHead(nn.Module):
    def __init__(self, k, bias, dls, drop=0.4):
        super().__init__()
        self.drop = nn.Dropout(drop)
        self.sconv0 = StdConv(512,256, stride=1, drop=drop)
        self.sconv1 = StdConv(256,256, drop=drop)
        self.sconv2 = StdConv(256,256, drop=drop)
        self.sconv3 = StdConv(256,256, drop=drop)
        self.out0 = OutConv(k, 256, bias, dls)
        self.out1 = OutConv(k, 256, bias, dls)
        self.out2 = OutConv(k, 256, bias, dls)
        self.out3 = OutConv(k, 256, bias, dls)

    def forward(self, x):
        x = self.drop(F.relu(x))
        x = self.sconv0(x)
        x = self.sconv1(x)
        o1c,o1l = self.out1(x)
        x = self.sconv2(x)
        o2c,o2l = self.out2(x)
        x = self.sconv3(x)
        o3c,o3l = self.out3(x)
        return [torch.cat([o1c,o2c,o3c], dim=1), # box
                torch.cat([o1l,o2l,o3l], dim=1)] # clas






anc_grids = [4, 2, 1]
anc_zooms = [0.75, 1., 1.3]
anc_ratios = [(1., 1.), (1., 0.5), (0.5, 1.)]

anchor_scales = [(anz*i,anz*j) for anz in anc_zooms 
                                    for (i,j) in anc_ratios]
# *** Number of Anchor Scales
k = len(anchor_scales)
# ***************************

import numpy as np
anc_offsets = [2/(o*2) for o in anc_grids] #2 is the h,w in fastai 1.0 (-1,1)
anc_x = np.concatenate([np.repeat(np.linspace(ao-1, 1-ao, ag), ag)
                        for ao,ag in zip(anc_offsets,anc_grids)])
anc_y = np.concatenate([np.tile(np.linspace(ao-1, 1-ao, ag), ag)
                        for ao,ag in zip(anc_offsets,anc_grids)])
anc_ctrs = np.repeat(np.stack([anc_x,anc_y], axis=1), k, axis=0)
anc_sizes = np.concatenate([np.array([[2*o/ag,2*p/ag] 
            for i in range(ag*ag) for o,p in anchor_scales])
                for ag in anc_grids]) #2/grid * scale,2 is the h,w in fastai 1.0
grid_sizes = torch.tensor(np.concatenate([np.array([ 1/ag 
            for i in range(ag*ag) for o,p in anchor_scales])
                for ag in anc_grids])).unsqueeze(1) *2 #again fastai 1.0 h,w is 2
anchors = torch.tensor(np.concatenate([anc_ctrs, anc_sizes], axis=1)).float()
anchor_cnr = cthw2corners(anchors)  
anchors = anchors.to(device)
anchor_cnr = anchor_cnr.to(device)
# 自己的SSD模型
class SSDModel(Module):
    def __init__(self, arch=models.resnet34, k=9, drop=0.4, no_cls=21):
        super().__init__()
        self.k = k
        
        self.body = create_body(arch(True))
        self.backbone = self.body
        self.drop = nn.Dropout(0.2)
        self.std_conv_0 = conv2_std_layer(num_features_model(self.body), 256, drop=drop,stride=1)
        # Dimension-reducing  layers
        self.std_conv_1 = conv2_std_layer(256, 256, drop=drop, stride=2) # 4 by 4 layer
        self.std_conv_2 = conv2_std_layer(256, 256, drop=drop, stride=2) # 2 by 2 layer
        self.std_conv_3 = conv2_std_layer(256, 256, drop=drop, stride=2) # 1 by 1 layer
        # Standard layers
        self.ssd_conv_1 = conv2_ssd_layer(256, k=self.k, no_cls=no_cls)
        self.ssd_conv_2 = conv2_ssd_layer(256, k=self.k, no_cls=no_cls)
        self.ssd_conv_3 = conv2_ssd_layer(256, k=self.k, no_cls=no_cls)

        # self.criterion = FocalLossMy()
        self.device = device
        self.anchors = anchors

    def forward(self, *x):
        imgs, targets = x if len(x)==2 else(x[0], None)
        xb = self.drop(F.relu(self.body(imgs)))
        xb = self.std_conv_0(xb)
        xb = self.std_conv_1(xb)
        bb1, cls1 = self.ssd_conv_1(xb) # 4 x 4
        xb = self.std_conv_2(xb)
        bb2, cls2 = self.ssd_conv_2(xb) # 2 x 2
        xb = self.std_conv_3(xb)     
        bb3, cls3  = self.ssd_conv_3(xb) # 1 x 1
        
        # bboxes = torch.cat([bb1, bb2, bb3], dim=1)
        # clases = torch.cat([cls1, cls2, cls3], dim=1)
        preds = [torch.cat([bb1, bb2, bb3], dim=1), 
                torch.cat([cls1, cls2, cls3], dim=1)]
        return preds
        # if targets is not None: # 训练过程
        #     cls_loss, reg_loss = self.criterion(preds, targets, self.anchors)
        #     return {"cls_loss": cls_loss, "reg_loss":reg_loss}
        # else:#验证过程
        #     predsOut = self.postprocess(imgs, self.anchors, preds)
        #     return predsOut
    
    def postprocess(self, x, anchors, preds):
        return None
loss_f = FocalLoss(21)

def ssd_1_loss(b_c,b_bb,bbox,clas,print_it=False):
    bbox,clas = un_pad(bbox,clas)
    a_ic = act_to_bbox(b_bb, anchors) # 之前的代码是有问题的，应该先转换激活元
    overlaps = iou(bbox.data, anchor_cnr.data)
    gt_overlap,gt_idx = map_to_gt(overlaps) # 找到真实的anchor
    gt_clas = clas[gt_idx]
    pos = gt_overlap > 0.4
    pos_idx = torch.nonzero(pos)[:,0]
    gt_clas[~pos] = 0
    gt_bbox = bbox[gt_idx]
    # loc_loss = ((a_ic[pos_idx] - gt_bbox[pos_idx]).abs()).mean()
    loc_loss = ((TensorBase(a_ic[TensorBase(pos_idx)]) - TensorBase(gt_bbox[TensorBase(pos_idx)])).abs()).mean()
    clas_loss  = loss_f(b_c, gt_clas)
    return loc_loss, clas_loss


def ssd_loss(pred,*targ,print_it=False):
    lcs,lls = 0.,0.
    for b_bb,b_c,bbox,clas in zip(*pred,*targ):
        loc_loss,clas_loss = ssd_1_loss(b_c,b_bb,bbox,clas,print_it)
        lls += loc_loss
        lcs += clas_loss
    if print_it: print(f'loc: {lls.data}, clas: {lcs.data}')
    # bce_loss就注释掉
#     if print_it: print(f'loc: {lls.data[0]}, clas: {lcs.data[0]}')
    return lls+lcs
 
  
    
@delegates(SSDModel)
def get_ssd_model(arch_str, num_classes, pretrained=True, pretrained_backbone=True,
                   trainable_layers=5, **kwargs):
    model = SSDModel(arch=arch_str, no_cls=num_classes)
    return model


ssd_resnet34 = partial(get_ssd_model, arch_str=models.resnet34)