fastai 目标检测

_helen_520

已于 2022-11-18 18:18:02 修改

阅读量707

点赞数

分类专栏： fastai学习笔记文章标签： python

于 2022-10-12 14:21:09 首次发布

本文链接：https://blog.csdn.net/haronchou/article/details/127281937

版权

fastai学习笔记专栏收录该内容

20 篇文章 7 订阅

订阅专栏

fastai论坛中的目标检测内容：

Overwhelmed by the sheer number of posts and replies. What can I do?
- focus on lectures and a project that is based off the lecture。论坛是一个很好的资源，但它们只有在您解决自己的问题时才会有所帮助。阅读长线程中的每条消息并不重要，而是使用右上角的搜索功能来查找对您有帮助的信息。
- Don’t treat the forum like your ‘Facebook’ feed by routinely scroll through the posts and replies 不要像读社交软件的推送一样来阅读forum。
- I am not too sure if this is a symptom of FOMO (Fear Of Missing Out) 害怕错过些啥的心理。我认为有一些做法可以帮助我们克服 FOMO。
- I learn to navigate the forum from noise-to-signal. I think there are many redundant threads and threads that are supposed to be in a better category.
- 学会从noise-to-signal来导航论坛上的内容。论坛上还有很多冗余的threads，以及有些线程应该在别的更好的分类中。
- 也许 Summarize This Topic会有用。
https://forums.fast.ai/t/whoah-what-happened-to-these-forums/6906
- radek的帖子：

soft question - On "familiarity" (or How to avoid "going down the Math Rabbit Hole"?) - Mathematics Stack Exchange

John von Neumann allegedly once said "Young man, in mathematics you don't understand things. You just get used to them." I
- 冯诺依曼说：你根本不懂数学，你只是习惯了。（颢语录）
关于兔子洞问题的讨论：

一、fastai计算mAP值的程序段

def nms(boxes, scores, overlap=0.5, top_k=100):
    keep = scores.new(scores.size(0)).zero_().long()
    if boxes.numel() == 0: return keep
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    area = torch.mul(x2 - x1, y2 - y1)
    v, idx = scores.sort(0)  # sort in ascending order
    idx = idx[-top_k:]  # indices of the top-k largest vals
    xx1 = boxes.new()
    yy1 = boxes.new()
    xx2 = boxes.new()
    yy2 = boxes.new()
    w = boxes.new()
    h = boxes.new()

    count = 0
    while idx.numel() > 0:
        i = idx[-1]  # index of current largest val
        keep[count] = i
        count += 1
        if idx.size(0) == 1: break
        idx = idx[:-1]  # remove kept element from view
        # load bboxes of next highest vals
#         import pdb
#         pdb.set_trace()
# https://blog.csdn.net/TonG_L/article/details/115239156 解决此处的报错问题
#         torch.index_select(x1, 0, idx, out=xx1)
#         torch.index_select(y1, 0, idx, out=yy1)
#         torch.index_select(x2, 0, idx, out=xx2)
#         torch.index_select(y2, 0, idx, out=yy2)
        xx1 = torch.index_select(x1, 0, idx)
        yy1 = torch.index_select(y1, 0, idx)
        xx2 = torch.index_select(x2, 0, idx)
        yy2 = torch.index_select(y2, 0, idx)

        # store element-wise max with next highest score
        xx1 = torch.clamp(xx1, min=x1[i])
        yy1 = torch.clamp(yy1, min=y1[i])
        xx2 = torch.clamp(xx2, max=x2[i])
        yy2 = torch.clamp(yy2, max=y2[i])
        w.resize_as_(xx2)
        h.resize_as_(yy2)
        w = xx2 - xx1
        h = yy2 - yy1
        # check sizes of xx1 and xx2.. after each iteration
        w = torch.clamp(w, min=0.0)
        h = torch.clamp(h, min=0.0)
        inter = w*h
        # IoU = i / (area(a) + area(b) - i)
        rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
        union = (rem_areas - inter) + area[i]
        IoU = inter/union  # store result in iou
        # keep only elements with an IoU <= overlap
        idx = idx[IoU.le(overlap)]
    return keep, count
import numpy as np
# from fastai import *
# from fastai.callbacks import *
# md model这部分很难弄，所以直接放到代码中去
anchors = anchors.to(device)
size = sz # batch_size

id2cats=['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']

def get_y(bbox,clas):
    bbox = bbox.view(-1,4)/size
    bb_keep = ((bbox[:,2] - bbox[:,0])>0.).nonzero()[:,0]
    return bbox[bb_keep], clas[bb_keep]
def get1preds(b_clas,b_bb,bbox,clas,thresh=0.25):
    bbox,clas = un_pad(bbox, clas)
#     import pdb
#     pdb.set_trace()
    a_ic = act_to_bbox(b_bb, anchors)
    clas_pr, clas_ids = b_clas.max(1)
    conf_scores = b_clas.sigmoid().t().data
    out1,out2,cc = [],[],[]
    for cl in range(conf_scores.size(0)-1):
        cl_mask = conf_scores[cl] > thresh
        if cl_mask.sum() == 0: continue
        scores = conf_scores[cl][cl_mask]
        l_mask = cl_mask.unsqueeze(1).expand_as(a_ic)
        boxes = a_ic[l_mask].view(-1, 4)
        ids, count = nms(boxes.data, scores, 0.4, 50)
        ids = ids[:count]
        out1.append(scores[ids])
        out2.append(boxes.data[ids])
        cc.append([cl]*count)
    cc = T(np.concatenate(cc)) if cc != [] else None
    out1 = torch.cat(out1) if out1 != [] else None
    out2 = torch.cat(out2) if out2 != [] else None
    return out1,out2,cc

def count(L):
    result = collections.defaultdict(int)
    if L is not None:
        for x in L:
            result[x] += 1
    return result

from ipywidgets import FloatProgress
from IPython.display import display

def multiTPFPFN(md:ImageDataBunch):
    n = 40
    threshes = np.linspace(.05, 0.95, n, endpoint=True)
    tps,fps,fns = np.zeros((n,len(id2cats))),np.zeros((n,len(id2cats))),np.zeros((n,len(id2cats)))
    prog = FloatProgress(min=0,max=len(md.valid_dl))
    display(prog)
    for data in md.valid_dl:
        x,y = data
#         x,y = V(x),V(y)
        pred = learn.model(x)#预测结果
        for idx in range(x.size(0)):
            bbox,clas = un_pad(y[0][idx],y[1][idx])#unpad the target
#             import pdb
#             pdb.set_trace()
            p_scrs,p_box,p_cls = get1preds(pred[1][idx],pred[0][idx],bbox,clas,threshes[0])
            overlaps = to_np(jaccard(p_box,bbox.data))
            overlaps = np.where(overlaps > 0.5, overlaps, 0)
#             import pdb
#             pdb.set_trace()
            clas, np_scrs, np_cls = to_np(clas.data),to_np(p_scrs), to_np(p_cls)
            for k in range(threshes.shape[0]):
                new_tp = collections.defaultdict(int)
                for cls in list(set(clas)):
                    msk_clas = np.bitwise_and((clas == cls)[None,:],(np_cls == cls)[:,None])
                    ov_clas = np.where(msk_clas,overlaps,0.)
                    mx_idx = np.argmax(ov_clas,axis=1)
                    for i in range(0,len(clas)):
                        if (clas[i] == cls):
                            keep = np.bitwise_and(np.max(ov_clas,axis=1) > 0.,mx_idx==i)
                            keep = np.bitwise_and(keep,np_scrs > threshes[k])
                            if keep.sum() > 0:
                                new_tp[cls] += 1
                count_pred = count(np_cls[np_scrs > threshes[k]])
                count_gt = count(clas)
                for c in range(len(id2cats)):
                    tps[k,c] += new_tp[c]
                    fps[k,c] += count_pred[c] - new_tp[c]
                    fns[k,c] += count_gt[c] - new_tp[c]
        prog.value += 1
    return tps, fps, fns

def mAP(md:ImageDataBunch):
    tps, fps, fns = multiTPFPFN(md)#先计算各个类别的tp,fp,fn

    def plot_prec_recall(clas):
        prec = np.where(tps[:,clas] + fps[:,clas] != 0, tps[:,clas]/(tps[:,clas] + fps[:,clas]), 1)
        recal = np.where(tps[:,clas] + fns[:,clas] != 0, tps[:,clas]/(tps[:,clas] + fns[:,clas]), 1)
        plt.plot(recal,prec)

    def avg_prec(clas):
        precisions = np.where(tps[:,clas] + fps[:,clas] != 0, tps[:,clas]/(tps[:,clas] + fps[:,clas]), 1)
        recalls = np.where(tps[:,clas] + fns[:,clas] != 0, tps[:,clas]/(tps[:,clas] + fns[:,clas]), 1)
        prec_at_rec = []
        for recall_level in np.linspace(0.0, 1.0, 11):
            try:
                args = np.argwhere(recalls >= recall_level).flatten()
                prec = max(precisions[args])
            except ValueError:
                prec = 0.0
            prec_at_rec.append(prec)
        return np.array(prec_at_rec).mean()
    S = 0
    for i in range(len(id2cats)):
        S += avg_prec(i)

    return S/len(id2cats)
def intersection(box_a,box_b):
    min_xy = torch.max(box_a[:,None,:2],box_b[None,:,:2])
    max_xy = torch.min(box_a[:,None,2:],box_b[None,:,2:])
    inter = torch.clamp(max_xy-min_xy,min=0)
    return inter[:,:,0] * inter[:,:,1]

def get_size(box):
    return (box[:,2]-box[:,0]) * (box[:,3] - box[:,1])

def jaccard(box_a,box_b):
    inter = intersection(box_a,box_b)
    union = get_size(box_a).unsqueeze(1) + get_size(box_b).unsqueeze(0) - inter
    return inter/union
def T(a, half=False, cuda=True):
    """
    Convert numpy array into a pytorch tensor. 
    if Cuda is available and USE_GPU=True, store resulting tensor in GPU.
    """
    if not torch.is_tensor(a):
        a = np.array(np.ascontiguousarray(a))
        if a.dtype in (np.int8, np.int16, np.int32, np.int64):
            a = torch.LongTensor(a.astype(np.int64))
        elif a.dtype in (np.float32, np.float64):
            a = to_half(a) if half else torch.FloatTensor(a)
        else: raise NotImplementedError(a.dtype)
    if cuda:
        a = to_gpu(a)
    return a
def to_gpu(x, *args, **kwargs):
    '''puts pytorch variable to gpu, if cuda is available and USE_GPU is set to true. '''
    return x.cuda(*args, **kwargs) if True else x