Yolov5中统计iou值的分布

小原今天吃什么

已于 2024-03-22 09:35:27 修改

阅读量257

点赞数 1

文章标签：深度学习 pytorch 人工智能

于 2023-11-13 16:55:39 首次发布

本文链接：https://blog.csdn.net/qq_46027463/article/details/134376141

版权

val.py中对应位置添加以下代码：

for循环前增添一行代码

    # iou计数初始化，添加下面这行代码
    allcount = torch.zeros(1).view(-1).to('cuda:0')
    for batch_i, (im, targets, paths, shapes) in enumerate(pbar):
        t1 = time_sync()
        if pt or jit or engine:
            im = im.to(device, non_blocking=True)
            targets = targets.to(device)
        im = im.half() if half else im.float()  # uint8 to fp16/32
        im /= 255  # 0 - 255 to 0.0 - 1.0
        nb, _, height, width = im.shape  # batch size, channels, height, width
        t2 = time_sync()
        dt[0] += t2 - t1

        # Inference
        out, train_out = model(im) if training else model(im, augment=augment, val=True)  # inference, loss outputs
        dt[1] += time_sync() - t2

接着，在循环内部添加两行代码

# Evaluate
            if nl:
                tbox = xywh2xyxy(labels[:, 1:5])  # target boxes
                scale_coords(im[si].shape[1:], tbox, shape, shapes[si][1])  # native-space labels
                labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labels
                correct = process_batch(predn, labelsn, iouv)
                # 添加以下两行代码
                oncecount = countsofiou(predn, labelsn, iouv).view(-1).to(iouv.device)
                allcount = torch.cat((allcount, oncecount), 0)
                if plots:
                    confusion_matrix.process_batch(predn, labelsn)
            else:
                correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool)
            stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))  # (correct, conf, pcls, tcls)

然后在循环结束后添加以下代码

    # 计算iou counts， 注意与for循环起始位置对齐
    allcount = torch.round(allcount * 100) / 100
    unique_elements, counts = torch.unique(allcount, return_counts=True)
    # 将唯一元素和出现次数保存到 CSV 文件中
    with open(save_dir/'unique_elements.csv', mode='w') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(['Unique Elements', 'Frequency'])
        for i in range(len(unique_elements)):
            writer.writerow([unique_elements[i], counts[i]])
    # 创建一个图形窗口
    fig, ax = plt.subplots()
    # 绘制柱状图
    ax.bar(unique_elements.cpu().numpy(), counts.cpu().numpy(), width=0.01, edgecolor='black')
    # 设置 x 和 y 轴标签以及标题
    ax.set_xlabel('IOU')
    ax.set_ylabel('Frequency')
    ax.set_title('Frequency of IOU')
    # 设置 x 轴范围为输入的范围
    ax.set_xlim(0.5, 1)
    # 显示图形
    plt.show()
    # fig改成plt的话保存的就是空白图片
    fig.savefig(save_dir / 'histogram.png')
    # 打印2位有效数字
    print("唯一元素:", unique_elements)
    print("出现次数:", counts)


    # 下方代码为原始自带代码，复制上方代码到指定位置即可
    # Compute metrics
    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
    if len(stats) and stats[0].any():
        tp, fp, p, r, f1, ap, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names)
        ap50, ap = ap[:, 0], ap.mean(1)  # AP@0.5, AP@0.5:0.95
        mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
        nt = np.bincount(stats[3].astype(np.int64), minlength=nc)  # number of targets per class
    else:
        nt = torch.zeros(1)

最后在全局区添加以下函数：

def countsofiou(detections, labels, iouv):
    """
    Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format.
    Arguments:
        detections (Array[N, 6]), x1, y1, x2, y2, conf, class
        labels (Array[M, 5]), class, x1, y1, x2, y2
    Returns:
        correct (Array[N, 10]), for 10 IoU levels
    """
    iou = box_iou(labels[:, 1:], detections[:, :4])
    x = torch.where(((iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5])))  # IoU above threshold and classes match
    # 返回的是condition中非0元素的索引，条件为真时所在元素位置索引。(labels[:, 0:1] == detections[:, 5])返回的是(N, M)的矩阵
    if x[0].shape[0]:
        matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()  # [N, 索引：label + detection + iou]
        # torch.stack(x, 1)在第二个维度拼接x中的两个一维张量(N, 2)，最后再拼接上iou(N, 1)为(N, 3)
        if x[0].shape[0] > 1:
            # 对iou排降序。argsort()升序，[::-1]从最后一个元素开始不断-1向前遍历，即反向降序
            matches = matches[matches[:, 2].argsort()[::-1]]
            # 对detection去重，返回2数组，第一个为唯一值，第二个为该值第一次出现的索引,如果pre的框重复出现2次，那也是记录与label的iou最高的一次，保留索引对应的值
            matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
            # matches = matches[matches[:, 2].argsort()[::-1]]  重新反向排序，但被注释掉了，应该是不需要重新排序
            # 对label去重，返回2数组，第一个为唯一值，第二个为该值第一次出现的索引，如果一个label对应多个pre的框，也是保留与pre的iou最高的一次。
            matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
            matches = torch.tensor(matches).to(iouv.device)
            # 取出匹配成功的iou值，
            iou = matches[:, 2].view(-1)
            # 为每个元素保留2位小数
            iou = torch.round(iou * 100) / 100

    return iou

运行结果：

改进：不难发现，在val.py中添加了以上代码，检测结果得到的FPS值会下降，统计iou操作所耗费的时间被算入了推理时间内，如果进行正常的val操作就需要注释掉上述代码，较为麻烦，于是可以在val.py中添加一个bool值的超参数来给该功能做个开关。

在parse_opt()函数的超参数设置的最后一行加上

parser.add_argument('--iou-count', default=True, help='use iou counts')

接着在run()函数的形参的最后一个加上 iou_count=False

def run(data,
        weights=None,  # model.pt path(s)
        batch_size=32,  # batch size
        imgsz=640,  # inference size (pixels)
        conf_thres=0.001,  # confidence threshold
        iou_thres=0.6,  # NMS IoU threshold
        task='val',  # train, val, test, speed or study
        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
        workers=8,  # max dataloader workers (per RANK in DDP mode)
        single_cls=False,  # treat as single-class dataset
        augment=False,  # augmented inference
        verbose=False,  # verbose output
        save_txt=False,  # save results to *.txt
        save_hybrid=False,  # save label+prediction hybrid results to *.txt
        save_conf=False,  # save confidences in --save-txt labels
        save_json=False,  # save a COCO-JSON results file
        project=ROOT / 'runs/val',  # save to project/name
        name='1',  # save to project/name
        exist_ok=False,  # existing project/name ok, do not increment
        half=True,  # use FP16 half-precision inference
        dnn=False,  # use OpenCV DNN for ONNX inference
        model=None,
        dataloader=None,
        save_dir=Path(''),
        plots=True,
        callbacks=Callbacks(),
        compute_loss=None,
        iou_count=False
        ):

接下来与前文类似，在对应的位置上加上if判断语句。

    # iou计数初始化
    if iou_count:
        allcount = torch.zeros(1).view(-1).to('cuda:0')
    for batch_i, (im, targets, paths, shapes) in enumerate(pbar):
        t1 = time_sync()
        if pt or jit or engine:
            im = im.to(device, non_blocking=True)
            targets = targets.to(device)
        im = im.half() if half else im.float()  # uint8 to fp16/32
        im /= 255  # 0 - 255 to 0.0 - 1.0
        nb, _, height, width = im.shape  # batch size, channels, height, width
        t2 = time_sync()
        dt[0] += t2 - t1

# Evaluate
            if nl:
                tbox = xywh2xyxy(labels[:, 1:5])  # target boxes
                scale_coords(im[si].shape[1:], tbox, shape, shapes[si][1])  # native-space labels
                labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labels
                correct = process_batch(predn, labelsn, iouv)
                if iou_count:
                    oncecount = countsofiou(predn, labelsn, iouv).view(-1).to(iouv.device)
                    allcount = torch.cat((allcount, oncecount), 0)
                if plots:
                    confusion_matrix.process_batch(predn, labelsn)
            else:
                correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool)
            stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))  # (correct, conf, pcls, tcls)

    if iou_count:
        # 计算iou counts
        allcount = torch.round(allcount * 100) / 100
        unique_elements, counts = torch.unique(allcount, return_counts=True)
        # 将唯一元素和出现次数保存到 CSV 文件中
        with open(save_dir/'unique_elements.csv', mode='w') as csv_file:
            writer = csv.writer(csv_file)
            writer.writerow(['Unique Elements', 'Frequency'])
            for i in range(len(unique_elements)):
                writer.writerow([unique_elements[i], counts[i]])
        # 创建一个图形窗口
        fig, ax = plt.subplots()
        # 绘制柱状图
        ax.bar(unique_elements.cpu().numpy(), counts.cpu().numpy(), width=0.01, edgecolor='black')
        # 设置 x 和 y 轴标签以及标题
        ax.set_xlabel('IOU')
        ax.set_ylabel('Frequency')
        ax.set_title('Frequency of IOU')
        # 设置 x 轴范围为输入的范围
        ax.set_xlim(0.5, 1)
        # 显示图形
        plt.show()
        # fig改成plt的话保存的就是空白图片
        fig.savefig(save_dir / 'histogram.png')
        # 打印2位有效数字
        print("唯一元素:", unique_elements)
        print("出现次数:", counts)

以上代码保存了csv文件，绘制了柱状图并展示了柱状图，同时打印了iou统计数据。读者可以根据自己的需求删掉多余的代码，例如删掉plt.show()，将打印删掉等。

在plot.py中添加以下函数

def plot_image(images, targets, pre, paths=None, fname='images.jpg', names=None, max_size=1920, max_subplots=16):
    # Plot image grid with labels
    if isinstance(images, torch.Tensor):
        images = images.cpu().float().numpy()
    if isinstance(targets, torch.Tensor):
        targets = targets.cpu().numpy()
    if isinstance(pre, torch.Tensor):
        pre = pre.cpu().numpy()
    if np.max(images[0]) <= 1:
        images *= 255  # de-normalise (optional)
    bs, _, h, w = images.shape  # batch size, _, height, width
    bs = min(bs, max_subplots)  # limit plot images
    ns = np.ceil(bs ** 0.5)  # number of subplots (square)

    # Build Image
    mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
    for i, im in enumerate(images):
        if i == max_subplots:  # if last batch has fewer images than we expect
            break
        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
        im = im.transpose(1, 2, 0)
        mosaic[y:y + h, x:x + w, :] = im

    # Resize (optional)
    scale = max_size / ns / max(h, w)
    if scale < 1:
        h = math.ceil(scale * h)
        w = math.ceil(scale * w)
        mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))

    # Annotate
    fs = int((h + w) * ns * 0.01)  # font size
    annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names)
    for i in range(i + 1):
        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
        annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2)  # borders 图像边界
        if paths:
            annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220))  # filenames
        if len(targets) > 0:
            ti = targets[targets[:, 0] == i]  # image targets  bs的序号==i表示如果是该batch内的信息，则保存在ti中
            tj = pre[pre[:, 0] == i]
            boxesi = xywh2xyxy(ti[:, 2:6]).T
            boxesj = xywh2xyxy(tj[:, 2:6]).T
            classesi = ti[:, 1].astype('int')
            classesj = tj[:, 1].astype('int')
            labelsi = ti.shape[1] == 6  # labels if no conf column
            labelsj = tj.shape[1] == 6
            confi = None if labelsi else ti[:, 6]  # check for confidence presence (label vs pred)
            confj = None if labelsj else tj[:, 6]  # check for confidence presence (label vs pred)

            if boxesi.shape[1]:
                if boxesi.max() <= 1.01:  # if normalized with tolerance 0.01
                    boxesi[[0, 2]] *= w  # scale to pixels 框在这张图片上的原始尺寸大小的宽
                    boxesi[[1, 3]] *= h
                elif scale < 1:  # absolute coords need scale if image scales
                    boxesi *= scale
            boxesi[[0, 2]] += x  # 框在batch拼成的大图上的位置
            boxesi[[1, 3]] += y
            if boxesj.shape[1]:
                if boxesj.max() <= 1.01:  # if normalized with tolerance 0.01
                    boxesj[[0, 2]] *= w  # scale to pixels 框在这张图片上的原始尺寸大小的宽
                    boxesj[[1, 3]] *= h
                elif scale < 1:  # absolute coords need scale if image scales
                    boxesj *= scale
            boxesj[[0, 2]] += x  # 框在batch拼成的大图上的位置
            boxesj[[1, 3]] += y

            for j, box in enumerate(boxesi.T.tolist()):
                clsi = classesi[j]  # boxes与classes的索引是一一对应的
                colori = colors(clsi)
                clsi = names[clsi] if names else clsi
                if labelsi or confi[j] > 0.25:  # 0.25 conf thresh
                    labeli = f'{clsi}' if labelsi else f'{clsi} {confi[j]:.1f}'
                    annotator.box_label(box, labeli, color=colori)  # 画框
            for j, box in enumerate(boxesj.T.tolist()):
                clsj = classesj[j]
                colorj = colors(clsj)
                clsj = names[clsj] if names else clsj
                if labelsj or confj[j] > 0.25:  # 0.25 conf thresh
                    labelj = f'{clsj}' if labelsj else f'{clsj} {confj[j]:.1f}'
                    annotator.box_label(box, labelj, color=(0, 0, 255))  # 画框
    annotator.im.save(fname)  # save

再在val.py中导入库，和添加对应代码

from utils.plots import output_to_target, plot_images, plot_val_study, plot_image

        # Plot images Thread将会创建并启动两个新的线程来执行 plot_images函数，以提高程序的执行效率和响应速度
        if plots and batch_i < 3:
            f = save_dir / f'val_batch{batch_i}_labels.jpg'  # labels
            Thread(target=plot_images, args=(im, targets, paths, f, names), daemon=True).start()
            f = save_dir / f'val_batch{batch_i}_pred.jpg'  # predictions
            Thread(target=plot_images, args=(im, output_to_target(out), paths, f, names), daemon=True).start()
            f = save_dir / f'val_batch{batch_i}_labels&pred.jpg'  # predictions
            Thread(target=plot_image, args=(im, targets, output_to_target(out), paths, f, names), daemon=True).start()

输出效果如下：val的前三个batch的label框与预测框同时展现。

接下来分析预测框与真实框之间的iou对比，比较两种算法的回归问题的准确性

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import csv
filename1 = '.../unique_elements.csv'
filename2 = '.../unique_elements.csv'
# 使用 Pandas 的 read_csv() 函数加载 CSV 文件
# 将 CSV 数据转换为张量（NumPy 数组）
baseline = pd.read_csv(filename1).values
improve = pd.read_csv(filename2).values
# 创建一个图形窗口
fig, ax = plt.subplots()
# 绘制柱状图 baseline[::2,0])选取偶数行，baseline[::2,0]选取奇数行
ax.bar(np.array(baseline[:,0]), np.array(baseline[:,1]), width=0.01, edgecolor='black', alpha=0.5)
ax.bar(np.array(improve[:,0]), np.array(improve[:,1]), width=0.01, edgecolor='black', color='red', alpha=0.5)
# 设置 x 和 y 轴标签以及标题
ax.set_xlabel('IOU')
ax.set_ylabel('Frequency')
ax.set_title('Frequency of IOU')
# 设置 x 轴范围为输入的范围
ax.set_xlim(0.5, 1)
ax.set_ylim(0, 25)
# 显示图形
plt.show()
# fig改成plt的话保存的就是空白图片
fig.savefig('bar.png')