utils小连招

最新推荐文章于 2022-11-16 12:02:04 发布

微凉code

最新推荐文章于 2022-11-16 12:02:04 发布

阅读量332

点赞数

分类专栏： Pytorch 目标检测文章标签：目标检测深度学习

本文链接：https://blog.csdn.net/qq_41921315/article/details/124364672

版权

Pytorch 同时被 2 个专栏收录

15 篇文章 0 订阅

订阅专栏

目标检测

14 篇文章 0 订阅

订阅专栏

1.NMS

可见链接

import numpy as np
import cv2
from PIL import Image
bboxes = np.array([[100, 100, 210, 210, 0.72],
                   [250, 250, 420, 420, 0.8],
                   [220, 220, 320, 330, 0.92],
                   [100, 100, 210, 210, 0.72],
                   [230, 240, 325, 330, 0.81],
                   [220, 230, 315, 340, 0.9]])
'''
  随便读入一张照片，用来展示框
'''
img=cv2.imread('../img/1.png')
for i in range(bboxes.shape[0]):
    cv2.rectangle(img,(int(bboxes[i][0]),int(bboxes[i][1])),(int(bboxes[i][2]),int(bboxes[i][3])),(0,0,255),3)

img1=Image.fromarray(img)
img1.show()
def nms(iou_thresh=0.5, conf_threash=0.5):
    # 基本思路：
    # (1) 将置信度进行降序排序，然后选择置信度最大的bbox，将其保存下来
    # (2) 将置信度最大的bbox和其他剩余的bbox进行交并比计算，将交并比大于阈值的bbox从这个集合中剔除出去
    # (3) 如果这个集合不为空的话，我们就重复上面的计算
    # 为了提高效率，我们保留bbox不动，最终保留的也都是bbox在原集合中的索引
    x1, y1, x2, y2, confidence = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3], bboxes[:, 4]
    area = (x2 - x1) * (y2 - y1)
    keep = []
    indices = confidence.argsort()[::-1]
    while indices.size > 0:
        idx_self, idx_other = indices[0], indices[1:]
        # 如果置信度小于阈值的话，那么后面的bbox就都不符合要求了，直接退出就行了

        if confidence[idx_self] < conf_threash:
            break

        keep.append(idx_self)
        # 计算交集
        xx1, yy1 = np.maximum(x1[idx_self], x1[idx_other]), np.maximum(y1[idx_self], y1[idx_other])
        xx2, yy2 = np.minimum(x2[idx_self], x2[idx_other]), np.minimum(y2[idx_self], y2[idx_other])
        w, h = np.maximum(0, xx2 - xx1), np.maximum(0, yy2 - yy1)
        intersection = w * h

        union = area[idx_self] + area[idx_other] - intersection
        iou = intersection / union

        # 只保留iou小于等于阈值的元素
        # 保留他们的数组序号keep_idx
        keep_idx = np.where(iou <= iou_thresh)[0]

        # 得到保留下来的数字组成的新数组
        indices = indices[keep_idx + 1]

    return np.array(keep)

keep=nms()
print(keep,2 in keep)
for i in keep:
    cv2.rectangle(img,(int(bboxes[i][0]),int(bboxes[i][1])),(int(bboxes[i][2]),int(bboxes[i][3])),(0,255,255),3)
#
img1=Image.fromarray(img)
img1.show()

nms的框展示
在这里插入图片描述

使用torchvision库的nms

import numpy as np
import cv2
import torch
from PIL import Image
bboxes = np.array([[100, 100, 210, 210, 0.72],
                   [250, 250, 420, 420, 0.8],
                   [220, 220, 320, 330, 0.92],
                   [100, 100, 210, 210, 0.72],
                   [230, 240, 325, 330, 0.81],
                   [220, 230, 315, 340, 0.9]])
'''
    使用torchvision库中自带的nms方法
'''
from torchvision.ops import nms
img=cv2.imread('../img/1.png')
bbox=bboxes[:,:4]
conf=bboxes[:,-1]
# 先筛选conf>0.5的框
confnum=conf>0.5
bbox=bbox[confnum]
conf=conf[confnum]
# 要转成tensor才能调用库
bbox=torch.from_numpy(bbox)
conf=torch.from_numpy(conf)
keep = nms(
    bbox,
    conf,
    0.5
)
# 画出框
print(bbox)
print(conf)
print(keep)
for i in keep:
    cv2.rectangle(img,(int(bboxes[i][0]),int(bboxes[i][1])),(int(bboxes[i][2]),int(bboxes[i][3])),(0,255,255),3)
#
img1=Image.fromarray(img)
img1.show()

nms后的框展示
在这里插入图片描述

2.get_map

计算map

相关知识链接
 睿智的目标检测20——利用mAP计算目标检测精确度
这个是用来绘制mAP曲线的。
https://github.com/Cartucho/mAP
这个是用来获取绘制mAP曲线所需的txt的
https://github.com/bubbliiiing/count-mAP-txt
因为我们要计算TP和FP，所以要对每个预测框进行判断，他是TP还是FP

首先我们要通过测试集得到一些文件
1.1 detection-results：指的是预测结果的txt。
1.2 ground-truth：指的是真实框的txt。
将同一类的预测结果放到一个json文件中，有n类就生成n个json文件
对每一类进行计算AP，计算TP，FP，recall和precision
计算mAP

2.1 得到文件

detection-results

        f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w") 
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
        #---------------------------------------------------------#
        image       = cvtColor(image)
        #---------------------------------------------------------#
        #   给图像增加灰条，实现不失真的resize
        #   也可以直接resize进行识别
        #---------------------------------------------------------#
        image_data  = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image)
        #---------------------------------------------------------#
        #   添加上batch_size维度
        #---------------------------------------------------------#
        image_data  = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)

        with torch.no_grad():
            images = torch.from_numpy(image_data)
            if self.cuda:
                images = images.cuda()
            #---------------------------------------------------------#
            #   将图像输入网络当中进行预测！
            #---------------------------------------------------------#
            outputs = self.net(images)
            outputs = self.bbox_util.decode_box(outputs)
            #---------------------------------------------------------#
            #   将预测框进行堆叠，然后进行非极大抑制
            #---------------------------------------------------------#
            results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, 
                        image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou)
                                                    
            if results[0] is None: 
                return 

            top_label   = np.array(results[0][:, 6], dtype = 'int32')
            top_conf    = results[0][:, 4] * results[0][:, 5]
            top_boxes   = results[0][:, :4]
		#c就是网络预测的label,将预测的值都写入detection-results/下面
        for i, c in list(enumerate(top_label)):
            predicted_class = self.class_names[int(c)]
            box             = top_boxes[i]
            score           = str(top_conf[i])

            top, left, bottom, right = box
            if predicted_class not in class_names:
                continue

            f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom))))

        f.close()

此时我们得到了一些txt文件，里面就是网络预测的结果.
请添加图片描述
ground-truth
解析xml文件，将gt写入txt文件

map_mode=0
if map_mode == 0 or map_mode == 2:
    print("Get ground truth result.")
    for image_id in tqdm(image_ids):
        with open(os.path.join(map_out_path, "ground-truth/"+image_id+".txt"), "w") as new_f:
            root = ET.parse(os.path.join(VOCdevkit_path, "VOC2007/Annotations/"+image_id+".xml")).getroot()
            for obj in root.findall('object'):
                difficult_flag = False
                if obj.find('difficult')!=None:
                    difficult = obj.find('difficult').text
                    if int(difficult)==1:
                        difficult_flag = True
                obj_name = obj.find('name').text
                if obj_name not in class_names:
                    continue
                bndbox  = obj.find('bndbox')
                left    = bndbox.find('xmin').text
                top     = bndbox.find('ymin').text
                right   = bndbox.find('xmax').text
                bottom  = bndbox.find('ymax').text

                if difficult_flag:
                    new_f.write("%s %s %s %s %s difficult\n" % (obj_name, left, top, right, bottom))
                else:
                    new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom))
    print("Get ground truth result done.")

请添加图片描述

2.2 生成json

得到gt框的json

path = 'map_out'
GT_PATH = os.path.join(path, 'ground-truth')
DR_PATH = os.path.join(path, 'detection-results')
IMG_PATH = os.path.join(path, 'images-optional')
TEMP_FILES_PATH = os.path.join(path, '.temp_files')
RESULTS_FILES_PATH = os.path.join(path, 'results')
#创建一个临时用的文件夹
if not os.path.exists(TEMP_FILES_PATH):
    os.makedirs(TEMP_FILES_PATH)

#得到gt文件列表
ground_truth_files_list = glob.glob(GT_PATH + '/*.txt')

if len(ground_truth_files_list) == 0:
    error("Error: No ground-truth files found!")
#文件排序
ground_truth_files_list.sort()
#每个类有多少gt
gt_counter_per_class     = {}

#记住这个函数，回头再看
def voc_ap(rec, prec):
    """
    --- Official matlab code VOC2012---
    mrec=[0 ; rec ; 1];
    mpre=[0 ; prec ; 0];
    for i=numel(mpre)-1:-1:1
            mpre(i)=max(mpre(i),mpre(i+1));
    end
    i=find(mrec(2:end)~=mrec(1:end-1))+1;
    ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
    """
    rec.insert(0, 0.0) # insert 0.0 at begining of list
    rec.append(1.0) # insert 1.0 at end of list
    mrec = rec[:]
    prec.insert(0, 1.0) # insert 0.0 at begining of list
    prec.append(0.0) # insert 0.0 at end of list
    mpre = prec[:]
    """
     This part makes the precision monotonically decreasing
        (goes from the end to the beginning)
        matlab: for i=numel(mpre)-1:-1:1
                    mpre(i)=max(mpre(i),mpre(i+1));
    """
    for i in range(len(mpre)-2, -1, -1):
        mpre[i] = max(mpre[i], mpre[i+1])

    """
     This part creates a list of indexes where the recall changes
        matlab: i=find(mrec(2:end)~=mrec(1:end-1))+1;
    """
    i_list = []
    for i in range(1, len(mrec)):
        if mrec[i] != mrec[i-1]:
            i_list.append(i) # if it was matlab would be i + 1
    """
     The Average Precision (AP) is the area under the curve
        (numerical integration)
        matlab: ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
    """
    '''
        nb!  记录曲线变化的点，将前面看成一个矩形，然后累加矩形的面积
    '''
    ap = 0.0
    for i in i_list:
        ap += ((mrec[i]-mrec[i-1])*mpre[i])
    return ap, mrec, mpre



"""
 Convert the lines of a file to a list
"""
def file_lines_to_list(path):
    # open txt file lines to a list
    with open(path) as f:
        content = f.readlines()
    # remove whitespace characters like `\n` at the end of each line
    content = [x.strip() for x in content]
    return content


# 获取真实值gt,将其写入到json中
for txt_file in ground_truth_files_list:
    file_id = txt_file.split(".txt", 1)[0]

    file_id = os.path.basename(os.path.normpath(file_id))

    temp_path = os.path.join(DR_PATH, (file_id + ".txt"))

    if not os.path.exists(temp_path):
        error_msg = "Error. File not found: {}\n".format(temp_path)
        error(error_msg)
    # 得到的是预测值的list
    lines_list = file_lines_to_list(txt_file)

    bounding_boxes = []
    is_difficult = False
    for line in lines_list:
        try:
            if "difficult" in line:
                class_name, left, top, right, bottom, _difficult = line.split()
                is_difficult = True
            else:
                class_name, left, top, right, bottom = line.split()
        except:
            if "difficult" in line:
                line_split = line.split()
                _difficult = line_split[-1]
                bottom = line_split[-2]
                right = line_split[-3]
                top = line_split[-4]
                left = line_split[-5]
                class_name = ""
                for name in line_split[:-5]:
                    class_name += name + " "
                class_name = class_name[:-1]
                is_difficult = True
            else:
                line_split = line.split()
                bottom = line_split[-1]
                right = line_split[-2]
                top = line_split[-3]
                left = line_split[-4]
                class_name = ""
                for name in line_split[:-4]:
                    class_name += name + " "
                class_name = class_name[:-1]

        bbox = left + " " + top + " " + right + " " + bottom
        if is_difficult:
            bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False, "difficult":True})
            is_difficult = False
        else:
            bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False})
            # 记录下出现了多少次
            if class_name in gt_counter_per_class:
                gt_counter_per_class[class_name] += 1
            else:
                gt_counter_per_class[class_name] = 1
    # 写入
    with open(TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json", 'w') as outfile:
        json.dump(bounding_boxes, outfile)

请添加图片描述

#测试集图片要包含所有的类别，不然n_classes就会少
gt_classes = list(gt_counter_per_class.keys())
gt_classes = sorted(gt_classes)
n_classes = len(gt_classes)

得到每一类预测值的json

# 获取网络预测的值
dr_files_list = glob.glob(DR_PATH + '/*.txt')
dr_files_list.sort()
for class_index, class_name in enumerate(gt_classes):
    # 用来记录属于class_name的所有图片的预测值
    bounding_boxes = []
    # 检查预测detection的每个文件
    for txt_file in dr_files_list:
        file_id = txt_file.split(".txt",1)[0]
        file_id = os.path.basename(os.path.normpath(file_id))
        temp_path = os.path.join(GT_PATH, (file_id + ".txt"))
        if class_index == 0:
            if not os.path.exists(temp_path):
                error_msg = "Error. File not found: {}\n".format(temp_path)
                error(error_msg)
        lines = file_lines_to_list(txt_file)
        for line in lines:
            try:
                tmp_class_name, confidence, left, top, right, bottom = line.split()
            except:
                line_split      = line.split()
                bottom          = line_split[-1]
                right           = line_split[-2]
                top             = line_split[-3]
                left            = line_split[-4]
                confidence      = line_split[-5]
                tmp_class_name  = ""
                for name in line_split[:-5]:
                    tmp_class_name += name + " "
                tmp_class_name  = tmp_class_name[:-1]
            # 如果属于该类
            if tmp_class_name == class_name:
                bbox = left + " " + top + " " + right + " " +bottom
                bounding_boxes.append({"confidence":confidence, "file_id":file_id, "bbox":bbox})

    bounding_boxes.sort(key=lambda x:float(x['confidence']), reverse=True)
    with open(TEMP_FILES_PATH + "/" + class_name + "_dr.json", 'w') as outfile:
        json.dump(bounding_boxes, outfile)

请添加图片描述

2.3 计算AP

sum_AP = 0.0
ap_dictionary = {}
lamr_dictionary = {}

with open(RESULTS_FILES_PATH + "/results.txt", 'w') as results_file:
    results_file.write("# AP and precision/recall per class\n")
    count_true_positives = {}

    # 对每一类进行计算ap
    for class_index, class_name in enumerate(gt_classes):
        count_true_positives[class_name] = 0
        dr_file = TEMP_FILES_PATH + "/" + class_name + "_dr.json"
        dr_data = json.load(open(dr_file))

        # nd为每一类预测值的总数量
        nd = len(dr_data)
        # 所有的tp,fp先为0
        tp = [0] * nd
        fp = [0] * nd
        score = [0] * nd
        score05_idx = 0
        '''
          对于每个预测值，判断其是否是TP
          主要依据:
                1.如果没有gt框与其的iou>0.5，那么就是FP
                2.如果有iou>0.5，那么看这个gt框有没有被别的预测值占用，因为一个gt框只能匹配一个预测框,占用即FP，没有占用就TP
        '''
        #
        for idx, detection in enumerate(dr_data):
            file_id = detection["file_id"]
            score[idx] = float(detection["confidence"])
            if score[idx] > 0.5:
                score05_idx = idx

            gt_file = TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json"
            ground_truth_data = json.load(open(gt_file))
            ovmax = -1
            gt_match = -1
            # 得到预测的所有bbox
            bb = [float(x) for x in detection["bbox"].split()]

            # 循环计算iou，找出与当前预测框iou最大的gt框
            for obj in ground_truth_data:
                if obj["class_name"] == class_name:
                    # 计算两个框的iou-->ov
                    bbgt = [float(x) for x in obj["bbox"].split()]
                    bi = [max(bb[0], bbgt[0]), max(bb[1], bbgt[1]), min(bb[2], bbgt[2]), min(bb[3], bbgt[3])]
                    iw = bi[2] - bi[0] + 1
                    ih = bi[3] - bi[1] + 1
                    if iw > 0 and ih > 0:
                        ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + (bbgt[2] - bbgt[0]
                                                                          + 1) * (bbgt[3] - bbgt[1] + 1) - iw * ih
                        # ov就是iou
                        ov = iw * ih / ua
                        if ov > ovmax:
                            ovmax = ov
                            gt_match = obj

            min_overlap = MINOVERLAP

            if ovmax >= min_overlap:
                if "difficult" not in gt_match:
                    if not bool(gt_match["used"]):
                        tp[idx] = 1
                        gt_match["used"] = True
                        count_true_positives[class_name] += 1
                        # 重新写入文件，表明该gt框被占用
                        with open(gt_file, 'w') as f:
                            f.write(json.dumps(ground_truth_data))
                    else:
                        fp[idx] = 1
            else:
                fp[idx] = 1

TP和FP计算完成，可以先调用库直接算AP

        epsilon = 1e-6  # 防止分母为0
        # 将tp,fp转成tensor
        tparray=torch.Tensor(tp)
        fparray = torch.Tensor(fp)
        
        TP_cumsum = torch.cumsum(tparray,dim=0)
        FP_cumsum = torch.cumsum(fparray,dim=0)
        
        #计算recalls和precisions
        recalls = TP_cumsum / (gt_counter_per_class[class_name] + epsilon)
        precisions = torch.divide(TP_cumsum, (TP_cumsum + FP_cumsum+epsilon))

        precisions = torch.cat((torch.tensor([0]), precisions))
        recalls = torch.cat((torch.tensor([1]), recalls))


        # 使用trapz计算AP trapz(y,x)
        ap=(torch.trapz(precisions, recalls))
        print(f'ave:{ap}')

下面手动计算AP

        # fp中1的数量
        cumsum = 0
        for idx, val in enumerate(fp):
            fp[idx] += cumsum
            cumsum += val

        # 将[0,0,1,1,1,1]的形式转成[0,0,1,2,3,4]的形式
        # 累加表明到这儿有多少TP/FP
        # tp中1的数量
        cumsum = 0
        for idx, val in enumerate(tp):
            tp[idx] += cumsum
            cumsum += val


        rec = tp[:]
        # 到idx时的recall是多少
        for idx, val in enumerate(tp):
            rec[idx] = float(tp[idx]) / np.maximum(gt_counter_per_class[class_name], 1)
            # print(f'tp[idx]:{idx,tp[idx]},gt_counter_per_class[class_name]::{gt_counter_per_class[class_name]}')

        prec = tp[:]
        for idx, val in enumerate(tp):
            prec[idx] = float(tp[idx]) / np.maximum((fp[idx] + tp[idx]), 1)


        ap, mrec, mprec = voc_ap(rec[:], prec[:])
        print(f'ap:{ap}')

但实际上,使用官方库函数和voc_ap的计算结果不一样。
是因为torch.trapz的问题

import numpy as np
import matplotlib.pyplot as plt

def voc_ap(rec, prec):
    """
    --- Official matlab code VOC2012---
    mrec=[0 ; rec ; 1];
    mpre=[0 ; prec ; 0];
    for i=numel(mpre)-1:-1:1
            mpre(i)=max(mpre(i),mpre(i+1));
    end
    i=find(mrec(2:end)~=mrec(1:end-1))+1;
    ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
    """
    rec.insert(0, 0.0) # insert 0.0 at begining of list
    rec.append(1.0) # insert 1.0 at end of list
    mrec = rec[:]
    prec.insert(0, 1.0) # insert 0.0 at begining of list
    prec.append(0.0) # insert 0.0 at end of list
    mpre = prec[:]
    """
     This part makes the precision monotonically decreasing
        (goes from the end to the beginning)
        matlab: for i=numel(mpre)-1:-1:1
                    mpre(i)=max(mpre(i),mpre(i+1));
    """
    for i in range(len(mpre)-2, -1, -1):
        mpre[i] = max(mpre[i], mpre[i+1])

    """
     This part creates a list of indexes where the recall changes
        matlab: i=find(mrec(2:end)~=mrec(1:end-1))+1;
    """
    i_list = []
    for i in range(1, len(mrec)):
        if mrec[i] != mrec[i-1]:
            i_list.append(i) # if it was matlab would be i + 1
            print(mrec[i])
    """
     The Average Precision (AP) is the area under the curve
        (numerical integration)
        matlab: ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
    """
    '''
        nb!  记录曲线变化的点，将前面看成一个矩形，然后累加矩形的面积
    '''
    ap = 0.0
    for i in i_list:
        ap += ((mrec[i]-mrec[i-1])*mpre[i])
        print(ap,mrec[i],mrec[i-1],mpre[i])
    return ap, mrec, mpre

y=[0.6,0.5,0.4,0.3,0.2,0.1]
x=[0.1,0.1,0.2,0.2,0.3,0.3]
res=np.trapz(y,x)
print(res)
res2=voc_ap(x,y)
print(res2)
plt.plot(x,y)
plt.show()

官方库的结果0.07
voc_ap的结果为0.12
请添加图片描述

#接上计算F1等其他值
F1  = np.array(rec)*np.array(prec)*2 / np.where((np.array(prec)+np.array(rec))==0, 1, (np.array(prec)+np.array(rec)))

            sum_AP  += ap
            text    = "{0:.2f}%".format(ap*100) + " = " + class_name + " AP " #class_name + " AP = {0:.2f}%".format(ap*100)

            if len(prec)>0:
                F1_text         = "{0:.2f}".format(F1[score05_idx]) + " = " + class_name + " F1 "
                Recall_text     = "{0:.2f}%".format(rec[score05_idx]*100) + " = " + class_name + " Recall "
                Precision_text  = "{0:.2f}%".format(prec[score05_idx]*100) + " = " + class_name + " Precision "
            else:
                F1_text         = "0.00" + " = " + class_name + " F1 " 
                Recall_text     = "0.00%" + " = " + class_name + " Recall " 
                Precision_text  = "0.00%" + " = " + class_name + " Precision " 

            rounded_prec    = [ '%.2f' % elem for elem in prec ]
            rounded_rec     = [ '%.2f' % elem for elem in rec ]
            results_file.write(text + "\n Precision: " + str(rounded_prec) + "\n Recall :" + str(rounded_rec) + "\n\n")
            if len(prec)>0:
                print(text + "\t||\tscore_threhold=0.5 : " + "F1=" + "{0:.2f}".format(F1[score05_idx])\
                    + " ; Recall=" + "{0:.2f}%".format(rec[score05_idx]*100) + " ; Precision=" + "{0:.2f}%".format(prec[score05_idx]*100))
            else:
                print(text + "\t||\tscore_threhold=0.5 : F1=0.00% ; Recall=0.00% ; Precision=0.00%")
            ap_dictionary[class_name] = ap