(1) IOU的计算
def iou_xywh_torch(boxes1, boxes2):
"""
:param boxes1: boxes1和boxes2的shape可以不相同,但是需要满足广播机制,且需要是Tensor
:param boxes2: 且需要保证最后一维为坐标维,以及坐标的存储结构为(x, y, w, h)
:return: 返回boxes1和boxes2的IOU,IOU的shape为boxes1和boxes2广播后的shape[:-1]
"""
boxes1_area = boxes1[..., 2] * boxes1[..., 3]
boxes2_area = boxes2[..., 2] * boxes2[..., 3]
# 分别计算出boxes1和boxes2的左上角坐标、右下角坐标
# 存储结构为(xmin, ymin, xmax, ymax),其中(xmin,ymin)是bbox的左上角坐标,(xmax,ymax)是bbox的右下角坐标
boxes1 = torch.cat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
boxes1[..., :2] + boxes1[..., 2:] * 0.5], dim=-1)
boxes2 = torch.cat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
boxes2[..., :2] + boxes2[..., 2:] * 0.5], dim=-1)
# 计算出boxes1与boxes1相交部分的左上角坐标、右下角坐标
left_up = torch.max(boxes1[..., :2], boxes2[..., :2])
right_down = torch.min(boxes1[..., 2:], boxes2[..., 2:])
# 因为两个boxes没有交集时,(right_down - left_up) < 0,所以maximum可以保证当两个boxes没有交集时,它们之间的iou为0
inter_section = torch.max(right_down - left_up, torch.zeros_like(right_down))
inter_area = inter_section[..., 0] * inter_section[..., 1]
union_area = boxes1_area + boxes2_area - inter_area
IOU = 1.0 * inter_area / union_area
return IOU
(2) GIOU计算
def GIOU_xywh_torch(boxes1, boxes2): # GIOU 的计算
"""
https://arxiv.org/abs/1902.09630
boxes1(boxes2)' shape is [..., (x,y,w,h)].The size is for original image.
"""
# xywh->xyxy
boxes1 = torch.cat([boxes1[..., :2] - boxes1[..., 2:] * 0.5, # 中心点和宽高变成左上角和右下角,这里把中心点挪到原点了
boxes1[..., :2] + boxes1[..., 2:] * 0.5], dim=-1)
boxes2 = torch.cat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
boxes2[..., :2] + boxes2[..., 2:] * 0.5], dim=-1)
boxes1 = torch.cat([torch.min(boxes1[..., :2], boxes1[..., 2:]), # 防止不规则形状
torch.max(boxes1[..., :2], boxes1[..., 2:])], dim=-1)
boxes2 = torch.cat([torch.min(boxes2[..., :2], boxes2[..., 2:]),
torch.max(boxes2[..., :2], boxes2[..., 2:])], dim=-1)
boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
inter_left_up = torch.max(boxes1[..., :2], boxes2[..., :2]) # 相交区域左上角坐标
inter_right_down = torch.min(boxes1[..., 2:], boxes2[..., 2:]) # 相交区域右下角坐标
inter_section = torch.max(inter_right_down - inter_left_up, torch.zeros_like(inter_right_down)) # 没有相交的情况就是0
inter_area = inter_section[..., 0] * inter_section[..., 1]
union_area = boxes1_area + boxes2_area - inter_area # 并区域
IOU = 1.0 * inter_area / union_area
enclose_left_up = torch.min(boxes1[..., :2], boxes2[..., :2]) # 求可以包含box1 box2的区域左上角坐标
enclose_right_down = torch.max(boxes1[..., 2:], boxes2[..., 2:])
enclose_section = torch.max(enclose_right_down - enclose_left_up, torch.zeros_like(enclose_right_down))
enclose_area = enclose_section[..., 0] * enclose_section[..., 1]
GIOU = IOU - 1.0 * (enclose_area - union_area) / enclose_area
return GIOU
(3) NMS算法:nms算法只对同类才能抑制,对不同类(类间)不起作用。所以如果一个大目标里有一个小目标,首先得看这两个是否是同一类,如果是同一类可以通过增大其阈值将其抑制或者soft-nms,如果不是同一类,则不会被抑制。
def nms(bboxes, score_threshold, iou_threshold, sigma=0.3, method='nms'):
"""
:param bboxes:
假设有N个bbox的score大于score_threshold,那么bboxes的shape为(N, 6),存储格式为(xmin, ymin, xmax, ymax, score, class)
其中(xmin, ymin, xmax, ymax)的大小都是相对于输入原图的,score = conf * prob,class是bbox所属类别的索引号
:return: best_bboxes
假设NMS后剩下N个bbox,那么best_bboxes的shape为(N, 6),存储格式为(xmin, ymin, xmax, ymax, score, class)
其中(xmin, ymin, xmax, ymax)的大小都是相对于输入原图的,score = conf * prob,class是bbox所属类别的索引号
"""
classes_in_img = list(set(bboxes[:, 5].astype(np.int32))) # 把所有bbox的类别取出来,因为 set() 不会重复,有去重功能
best_bboxes = []
for cls in classes_in_img: # 对每个类别进行nms
cls_mask = (bboxes[:, 5].astype(np.int32) == cls)
cls_bboxes = bboxes[cls_mask] # 从所有的预测框中挑出要nms的类别的框
while len(cls_bboxes) > 0:
max_ind = np.argmax(cls_bboxes[:, 4]) # 返回 score 最大的bbox的索引
best_bbox = cls_bboxes[max_ind] # 挑出最好的框
best_bboxes.append(best_bbox) # 把最好的框放进入
cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]]) # 将除了score 最大的bbox之外的其他bboxes取出来
iou = iou_xyxy_numpy(best_bbox[np.newaxis, :4], cls_bboxes[:, :4]) # 计算score 最大的bbox和其他bbox的iou
assert method in ['nms', 'soft-nms']
weight = np.ones((len(iou),), dtype=np.float32)
if method == 'nms': # nms方法
iou_mask = iou > iou_threshold # 将iou > iou_threshold 的 bbox 置为True
weight[iou_mask] = 0.0 # 把置为 true 的bbox的索引改为 0
if method == 'soft-nms':
weight = np.exp(-(1.0 * iou ** 2 / sigma))
cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight # 将置为 true 的bbox的 score 改为 0
score_mask = cls_bboxes[:, 4] > score_threshold # 将该类的bbox中score大于score_threshold的bbox提取出来
cls_bboxes = cls_bboxes[score_mask] # 得到更新后的bbox
return np.array(best_bboxes)
4,mAP计算
参考:mAP计算
mAP计算方法
COCO目标检测测评指标
def voc_ap(self, rec, prec, use_07_metric=True): # rec= recall, prec = precision
if use_07_metric:
ap = 0.
# 2010年以前按recall等间隔取11个不同点处的精度值做平均(0., 0.1, 0.2, …, 0.9, 1.0)M
for t in np.arange(0., 1.1, 0.1):
if np.sum(rec >= t) == 0:
p = 0
else:
# 取最大值等价于2010以后先计算包络线的操作,保证precise非减
p = np.max(prec[rec >= t])
ap = ap + p / 11.
else:
# 2010年以后取所有不同的recall对应的点处的精度值做平均
# first append sentinel values at the end
mrec = np.concatenate(([0.], rec, [1.]))
mpre = np.concatenate(([0.], prec, [0.]))
# 计算包络线,从后往前取最大保证precise非减
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# 找出所有检测结果中recall不同的点
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
# 用recall的间隔对精度作加权平均
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
# 计算每个类别对应的AP,mAP是所有类别AP的平均值
def voc_eval(self, detpath,
classname,
ovthresh=0.5,
use_07_metric=True):
# 提取所有测试图片中当前类别所对应的所有ground_truth
class_recs = {}
npos = 0
# 遍历所有测试图片
for imagename in imagenames:
# 找出所有当前类别对应的object
R = [obj for obj in recs[imagename] if obj['name'] == classname]
# 该图片中该类别对应的所有bbox
bbox = np.array([x['bbox'] for x in R])
difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
# 该图片中该类别对应的所有bbox的是否已被匹配的标志位
det = [False] * len(R)
# 累计所有图片中的该类别目标的总数,不算diffcult
npos = npos + sum(~difficult)
class_recs[imagename] = {'bbox': bbox,
'difficult': difficult,
'det': det}
# 读取相应类别的检测结果文件,每一行对应一个检测目标
if any(lines) == 1:
# 某一行对应的检测目标所属的图像名
image_ids = [x[0] for x in splitlines]
# 读取该目标对应的置信度
confidence = np.array([float(x[1]) for x in splitlines])
# 读取该目标对应的bbox
BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
# 将该类别的检测结果按照置信度大小降序排列
sorted_ind = np.argsort(-confidence)
sorted_scores = np.sort(-confidence)
BB = BB[sorted_ind, :]
image_ids = [image_ids[x] for x in sorted_ind]
# 该类别检测结果的总数(所有检测出的bbox的数目)
nd = len(image_ids)
# 用于标记每个检测结果是tp还是fp
tp = np.zeros(nd)
fp = np.zeros(nd)
# 按置信度遍历每个检测结果
for d in range(nd):
# 取出该条检测结果所属图片中的所有ground truth
R = class_recs[image_ids[d]]
bb = BB[d, :].astype(float)
ovmax = -np.inf
BBGT = R['bbox'].astype(float)
# 计算与该图片中所有ground truth的最大重叠度
if BBGT.size > 0:
......
overlaps = inters / uni
ovmax = np.max(overlaps)
jmax = np.argmax(overlaps)
# 如果最大的重叠度大于一定的阈值
if ovmax > ovthresh:
# 如果最大重叠度对应的ground truth为difficult就忽略
if not R['difficult'][jmax]:
# 如果对应的最大重叠度的ground truth以前没被匹配过则匹配成功,即tp
if not R['det'][jmax]:
tp[d] = 1.
R['det'][jmax] = 1
# 若之前有置信度更高的检测结果匹配过这个ground truth,则此次检测结果为fp
else:
fp[d] = 1.
# 该图片中没有对应类别的目标ground truth或者与所有ground truth重叠度都小于阈值
else:
fp[d] = 1.
# 按置信度取不同数量检测结果时的累计fp和tp
# np.cumsum([1, 2, 3, 4]) -> [1, 3, 6, 10]
fp = np.cumsum(fp)
tp = np.cumsum(tp)
# 召回率为占所有真实目标数量的比例,非减的,注意npos本身就排除了difficult,因此npos=tp+fn
rec = tp / float(npos)
# 精度为取的所有检测结果中tp的比例
prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
# 计算recall-precise曲线下面积(严格来说并不是面积)
ap = self.voc_ap(rec, prec, use_07_metric)
# 如果这个类别对应的检测结果为空,那么都是-1
else:
rec = -1.
prec = -1.
ap = -1.
return rec, prec, ap
(5) 求两个矩阵中两两点间的欧式距离.
求欧式距离
在我们使用k-NN
模型时,需要计算测试集中每一点到训练集中每一点的欧氏距离,即需要求得两矩阵之间两两向量的欧氏距离。
# X,Y为代求的矩阵,其中每个维度为一个点。
X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
Y = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
使用两层循环
num_x = X.shape[0]
num_y = Y.shape[0]
dist = np.zeros((num_x, num_y)) # 最后生成的矩阵一定是[3, 4]的,每个行的 4 个值表示X的某一行和Y的 4 个行向量的 4 个欧式距离
def cal_dist(X, Y):
for i in range(num_x):
for j in range(num_y):
dist[i][j] = np.sqrt(np.sum(np.square(X[i] - Y[j])))
return dist
使用一层循环
def cal_dist(X, Y):
num_test = X.shape[0]
num_train = Y.shape[0]
dist = np.zeros((num_test, num_train))
for i in range(num_test):
dist[i, :] = np.sqrt(np.sum(np.square(Y - X[i]), axis = 1)) # 利用广播机制,将X的每个行扩展为和Y一样维度的矩阵
return dist
不使用循环
这是运算效率最高的方法,将 X 和 Y 都使用矩阵表示,然后使用矩阵运算的方法替代之前的循环操作。但此操作需要我们对矩阵的运算规则非常熟悉。
def cal_dist(X,Y):
num_test = X.shape[0]
num_train = Y.shape[0]
dist = np.zeros((num_test, num_train))
dist = np.sqrt(-2 * np.dot(X, Y.T) + np.sum(np.square(Y), axis = 1) + np.transpose([np.sum(np.square(X), axis = 1)])) # np.dot()是矩阵乘法
return dist
(6)K-means 聚类算法代码
import xml.etree.ElementTree as ET
import numpy as np
# from pycocotools.coco import COCO
from tqdm import tqdm
import glob
import os
def iou(box, clusters):
"""
计算一个ground truth边界盒和k个先验框(Anchor)的交并比(IOU)值。
参数box: 元组或者数据,代表ground truth的长宽。
参数clusters: 形如(k,2)的numpy数组,其中k是聚类Anchor框的个数
返回:ground truth和每个Anchor框的交并比。
"""
x = np.minimum(clusters[:, 0], box[0])
y = np.minimum(clusters[:, 1], box[1])
if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
raise ValueError("Box has no area")
intersection = x * y
box_area = box[0] * box[1]
cluster_area = clusters[:, 0] * clusters[:, 1]
iou_ = intersection / (box_area + cluster_area - intersection)
return iou_
def avg_iou(boxes, clusters):
"""
计算一个ground truth和k个Anchor的交并比的均值。
"""
return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])])
def Iou_Kmeans(boxes, k, dist=np.median):
"""
利用IOU值进行K-means聚类
参数boxes: 形状为(r, 2)的ground truth框,其中r是ground truth的个数
参数k: Anchor的个数
参数dist: 距离函数
返回值:形状为(k, 2)的k个Anchor框
"""
# 即是上面提到的r
rows = boxes.shape[0]
# 距离数组,计算每个ground truth和k个Anchor的距离
distances = np.empty((rows, k))
# 上一次每个ground truth"距离"最近的Anchor索引
last_clusters = np.zeros((rows,))
# 设置随机数种子
np.random.seed()
# 初始化聚类中心,k个簇,从r个ground truth随机选k个
clusters = boxes[np.random.choice(rows, k, replace=False)]
# 开始聚类
while True:
# 计算每个ground truth和k个Anchor的距离,用1-IOU(box,anchor)来计算
for row in range(rows):
distances[row] = 1 - iou(boxes[row], clusters)
# 对每个ground truth,选取距离最小的那个Anchor,并存下索引
nearest_clusters = np.argmin(distances, axis=1)
# 如果当前每个ground truth"距离"最近的Anchor索引和上一次一样,聚类结束
if (last_clusters == nearest_clusters).all():
break
# 更新簇中心为簇里面所有的ground truth框的均值
for cluster in range(k):
clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0)
# 更新每个ground truth"距离"最近的Anchor索引
last_clusters = nearest_clusters
return clusters
def id2name(coco):
classes = dict()
classes_id = []
for cls in coco.dataset['categories']:
classes[cls['id']] = cls['name']
for key in classes.keys():
classes_id.append(key)
return classes, classes_id
def load_dataset(path, types='voc'):
dataset = []
if types == 'voc':
# for xml_file in glob.glob("{} /*xml".format(path)):
num = 9963
data_path = 'G:\github_files\datasets\VOC2007\Annotations'
for i in range(num):
xml_file = os.path.join(data_path, "{:06d}.xml".format(i+1))
tree = ET.parse(xml_file)
# 图片高度
height = int(tree.findtext("./size/height"))
# 图片宽度
width = int(tree.findtext("./size/width"))
for obj in tree.iter("object"):
# 偏移量
xmin = int(obj.findtext("bndbox/xmin")) / width
ymin = int(obj.findtext("bndbox/ymin")) / height
xmax = int(obj.findtext("bndbox/xmax")) / width
ymax = int(obj.findtext("bndbox/ymax")) / height
xmin = np.float64(xmin)
ymin = np.float64(ymin)
xmax = np.float64(xmax)
ymax = np.float64(ymax)
if xmax == xmin or ymax == ymin:
print(xml_file)
# 将Anchor的长宽放入dateset,运行kmeans获得Anchor
dataset.append([xmax - xmin, ymax - ymin])
# if types == 'coco':
#
# coco = COCO(path)
# classes, classes_id = id2name(coco)
# print(classes)
# print('class_ids:', classes_id)
#
# img_ids = coco.getImgIds()
# print(len(img_ids))
#
# for imgId in img_ids:
# i = 0
# img = coco.loadImgs(imgId)[i]
# height = img['height']
# width = img['width']
# i = i + 1
# if imgId % 500 == 0:
# print('process {} images'.format(imgId))
# annIds = coco.getAnnIds(imgIds=img['id'], iscrowd=None)
# anns = coco.loadAnns(annIds)
# # time.sleep(0.2)
# for ann in anns:
# if 'bbox' in ann:
# bbox = ann['bbox']
# '''
# coco:
# annotation: [x, y, width, height]
# '''
# ann_width = bbox[2]
# ann_height = bbox[3]
#
# # 偏移量
# ann_width = np.float64(ann_width / width)
# ann_height = np.float64(ann_height / height)
# dataset.append([ann_width, ann_height])
# else:
# raise ValueError("coco no bbox -- wrong!!!")
return np.array(dataset)
if __name__ == '__main__':
annFile = 'G:\github_files\datasets\VOC2007\Annotations'
clusters = 5
Inputdim = 800 # image shape
data = load_dataset(path=annFile, types='voc')
out = Iou_Kmeans(data, k=clusters)
anchor = np.array(out) * Inputdim
print("Boxes: {} ".format(anchor))
print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100))
final_anchors = np.around(out[:, 0] / out[:, 1], decimals=2).tolist()
print("Before Sort Ratios:\n {}".format(final_anchors))
print("After Sort Ratios:\n {}".format(sorted(final_anchors)))