1.NMS
可见链接
import numpy as np
import cv2
from PIL import Image
bboxes = np.array([[100, 100, 210, 210, 0.72],
[250, 250, 420, 420, 0.8],
[220, 220, 320, 330, 0.92],
[100, 100, 210, 210, 0.72],
[230, 240, 325, 330, 0.81],
[220, 230, 315, 340, 0.9]])
'''
随便读入一张照片,用来展示框
'''
img=cv2.imread('../img/1.png')
for i in range(bboxes.shape[0]):
cv2.rectangle(img,(int(bboxes[i][0]),int(bboxes[i][1])),(int(bboxes[i][2]),int(bboxes[i][3])),(0,0,255),3)
img1=Image.fromarray(img)
img1.show()
def nms(iou_thresh=0.5, conf_threash=0.5):
# 基本思路:
# (1) 将置信度进行降序排序,然后选择置信度最大的bbox,将其保存下来
# (2) 将置信度最大的bbox和其他剩余的bbox进行交并比计算,将交并比大于阈值的bbox从这个集合中剔除出去
# (3) 如果这个集合不为空的话,我们就重复上面的计算
# 为了提高效率,我们保留bbox不动,最终保留的也都是bbox在原集合中的索引
x1, y1, x2, y2, confidence = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3], bboxes[:, 4]
area = (x2 - x1) * (y2 - y1)
keep = []
indices = confidence.argsort()[::-1]
while indices.size > 0:
idx_self, idx_other = indices[0], indices[1:]
# 如果置信度小于阈值的话,那么后面的bbox就都不符合要求了,直接退出就行了
if confidence[idx_self] < conf_threash:
break
keep.append(idx_self)
# 计算交集
xx1, yy1 = np.maximum(x1[idx_self], x1[idx_other]), np.maximum(y1[idx_self], y1[idx_other])
xx2, yy2 = np.minimum(x2[idx_self], x2[idx_other]), np.minimum(y2[idx_self], y2[idx_other])
w, h = np.maximum(0, xx2 - xx1), np.maximum(0, yy2 - yy1)
intersection = w * h
union = area[idx_self] + area[idx_other] - intersection
iou = intersection / union
# 只保留iou小于等于阈值的元素
# 保留他们的数组序号keep_idx
keep_idx = np.where(iou <= iou_thresh)[0]
# 得到保留下来的数字组成的新数组
indices = indices[keep_idx + 1]
return np.array(keep)
keep=nms()
print(keep,2 in keep)
for i in keep:
cv2.rectangle(img,(int(bboxes[i][0]),int(bboxes[i][1])),(int(bboxes[i][2]),int(bboxes[i][3])),(0,255,255),3)
#
img1=Image.fromarray(img)
img1.show()
nms的框展示
使用torchvision库的nms
import numpy as np
import cv2
import torch
from PIL import Image
bboxes = np.array([[100, 100, 210, 210, 0.72],
[250, 250, 420, 420, 0.8],
[220, 220, 320, 330, 0.92],
[100, 100, 210, 210, 0.72],
[230, 240, 325, 330, 0.81],
[220, 230, 315, 340, 0.9]])
'''
使用torchvision库中自带的nms方法
'''
from torchvision.ops import nms
img=cv2.imread('../img/1.png')
bbox=bboxes[:,:4]
conf=bboxes[:,-1]
# 先筛选conf>0.5的框
confnum=conf>0.5
bbox=bbox[confnum]
conf=conf[confnum]
# 要转成tensor才能调用库
bbox=torch.from_numpy(bbox)
conf=torch.from_numpy(conf)
keep = nms(
bbox,
conf,
0.5
)
# 画出框
print(bbox)
print(conf)
print(keep)
for i in keep:
cv2.rectangle(img,(int(bboxes[i][0]),int(bboxes[i][1])),(int(bboxes[i][2]),int(bboxes[i][3])),(0,255,255),3)
#
img1=Image.fromarray(img)
img1.show()
nms后的框展示
2.get_map
计算map
相关知识链接
睿智的目标检测20——利用mAP计算目标检测精确度
这个是用来绘制mAP曲线的。
https://github.com/Cartucho/mAP
这个是用来获取绘制mAP曲线所需的txt的
https://github.com/bubbliiiing/count-mAP-txt
因为我们要计算TP和FP,所以要对每个预测框进行判断,他是TP还是FP
- 首先我们要通过测试集得到一些文件
1.1 detection-results:指的是预测结果的txt。
1.2 ground-truth:指的是真实框的txt。 - 将同一类的预测结果放到一个json文件中,有n类就生成n个json文件
- 对每一类进行计算AP,计算TP,FP,recall和precision
- 计算mAP
2.1 得到文件
detection-results
f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w")
image_shape = np.array(np.shape(image)[0:2])
#---------------------------------------------------------#
# 在这里将图像转换成RGB图像,防止灰度图在预测时报错。
# 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
#---------------------------------------------------------#
image = cvtColor(image)
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
# 也可以直接resize进行识别
#---------------------------------------------------------#
image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image)
#---------------------------------------------------------#
# 添加上batch_size维度
#---------------------------------------------------------#
image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)
with torch.no_grad():
images = torch.from_numpy(image_data)
if self.cuda:
images = images.cuda()
#---------------------------------------------------------#
# 将图像输入网络当中进行预测!
#---------------------------------------------------------#
outputs = self.net(images)
outputs = self.bbox_util.decode_box(outputs)
#---------------------------------------------------------#
# 将预测框进行堆叠,然后进行非极大抑制
#---------------------------------------------------------#
results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape,
image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou)
if results[0] is None:
return
top_label = np.array(results[0][:, 6], dtype = 'int32')
top_conf = results[0][:, 4] * results[0][:, 5]
top_boxes = results[0][:, :4]
#c就是网络预测的label,将预测的值都写入detection-results/下面
for i, c in list(enumerate(top_label)):
predicted_class = self.class_names[int(c)]
box = top_boxes[i]
score = str(top_conf[i])
top, left, bottom, right = box
if predicted_class not in class_names:
continue
f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom))))
f.close()
此时我们得到了一些txt文件,里面就是网络预测的结果.
ground-truth
解析xml文件,将gt写入txt文件
map_mode=0
if map_mode == 0 or map_mode == 2:
print("Get ground truth result.")
for image_id in tqdm(image_ids):
with open(os.path.join(map_out_path, "ground-truth/"+image_id+".txt"), "w") as new_f:
root = ET.parse(os.path.join(VOCdevkit_path, "VOC2007/Annotations/"+image_id+".xml")).getroot()
for obj in root.findall('object'):
difficult_flag = False
if obj.find('difficult')!=None:
difficult = obj.find('difficult').text
if int(difficult)==1:
difficult_flag = True
obj_name = obj.find('name').text
if obj_name not in class_names:
continue
bndbox = obj.find('bndbox')
left = bndbox.find('xmin').text
top = bndbox.find('ymin').text
right = bndbox.find('xmax').text
bottom = bndbox.find('ymax').text
if difficult_flag:
new_f.write("%s %s %s %s %s difficult\n" % (obj_name, left, top, right, bottom))
else:
new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom))
print("Get ground truth result done.")
2.2 生成json
得到gt框的json
path = 'map_out'
GT_PATH = os.path.join(path, 'ground-truth')
DR_PATH = os.path.join(path, 'detection-results')
IMG_PATH = os.path.join(path, 'images-optional')
TEMP_FILES_PATH = os.path.join(path, '.temp_files')
RESULTS_FILES_PATH = os.path.join(path, 'results')
#创建一个临时用的文件夹
if not os.path.exists(TEMP_FILES_PATH):
os.makedirs(TEMP_FILES_PATH)
#得到gt文件列表
ground_truth_files_list = glob.glob(GT_PATH + '/*.txt')
if len(ground_truth_files_list) == 0:
error("Error: No ground-truth files found!")
#文件排序
ground_truth_files_list.sort()
#每个类有多少gt
gt_counter_per_class = {}
#记住这个函数,回头再看
def voc_ap(rec, prec):
"""
--- Official matlab code VOC2012---
mrec=[0 ; rec ; 1];
mpre=[0 ; prec ; 0];
for i=numel(mpre)-1:-1:1
mpre(i)=max(mpre(i),mpre(i+1));
end
i=find(mrec(2:end)~=mrec(1:end-1))+1;
ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
"""
rec.insert(0, 0.0) # insert 0.0 at begining of list
rec.append(1.0) # insert 1.0 at end of list
mrec = rec[:]
prec.insert(0, 1.0) # insert 0.0 at begining of list
prec.append(0.0) # insert 0.0 at end of list
mpre = prec[:]
"""
This part makes the precision monotonically decreasing
(goes from the end to the beginning)
matlab: for i=numel(mpre)-1:-1:1
mpre(i)=max(mpre(i),mpre(i+1));
"""
for i in range(len(mpre)-2, -1, -1):
mpre[i] = max(mpre[i], mpre[i+1])
"""
This part creates a list of indexes where the recall changes
matlab: i=find(mrec(2:end)~=mrec(1:end-1))+1;
"""
i_list = []
for i in range(1, len(mrec)):
if mrec[i] != mrec[i-1]:
i_list.append(i) # if it was matlab would be i + 1
"""
The Average Precision (AP) is the area under the curve
(numerical integration)
matlab: ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
"""
'''
nb! 记录曲线变化的点,将前面看成一个矩形,然后累加矩形的面积
'''
ap = 0.0
for i in i_list:
ap += ((mrec[i]-mrec[i-1])*mpre[i])
return ap, mrec, mpre
"""
Convert the lines of a file to a list
"""
def file_lines_to_list(path):
# open txt file lines to a list
with open(path) as f:
content = f.readlines()
# remove whitespace characters like `\n` at the end of each line
content = [x.strip() for x in content]
return content
# 获取真实值gt,将其写入到json中
for txt_file in ground_truth_files_list:
file_id = txt_file.split(".txt", 1)[0]
file_id = os.path.basename(os.path.normpath(file_id))
temp_path = os.path.join(DR_PATH, (file_id + ".txt"))
if not os.path.exists(temp_path):
error_msg = "Error. File not found: {}\n".format(temp_path)
error(error_msg)
# 得到的是预测值的list
lines_list = file_lines_to_list(txt_file)
bounding_boxes = []
is_difficult = False
for line in lines_list:
try:
if "difficult" in line:
class_name, left, top, right, bottom, _difficult = line.split()
is_difficult = True
else:
class_name, left, top, right, bottom = line.split()
except:
if "difficult" in line:
line_split = line.split()
_difficult = line_split[-1]
bottom = line_split[-2]
right = line_split[-3]
top = line_split[-4]
left = line_split[-5]
class_name = ""
for name in line_split[:-5]:
class_name += name + " "
class_name = class_name[:-1]
is_difficult = True
else:
line_split = line.split()
bottom = line_split[-1]
right = line_split[-2]
top = line_split[-3]
left = line_split[-4]
class_name = ""
for name in line_split[:-4]:
class_name += name + " "
class_name = class_name[:-1]
bbox = left + " " + top + " " + right + " " + bottom
if is_difficult:
bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False, "difficult":True})
is_difficult = False
else:
bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False})
# 记录下出现了多少次
if class_name in gt_counter_per_class:
gt_counter_per_class[class_name] += 1
else:
gt_counter_per_class[class_name] = 1
# 写入
with open(TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json", 'w') as outfile:
json.dump(bounding_boxes, outfile)
#测试集图片要包含所有的类别,不然n_classes就会少
gt_classes = list(gt_counter_per_class.keys())
gt_classes = sorted(gt_classes)
n_classes = len(gt_classes)
得到每一类预测值的json
# 获取网络预测的值
dr_files_list = glob.glob(DR_PATH + '/*.txt')
dr_files_list.sort()
for class_index, class_name in enumerate(gt_classes):
# 用来记录属于class_name的所有图片的预测值
bounding_boxes = []
# 检查预测detection的每个文件
for txt_file in dr_files_list:
file_id = txt_file.split(".txt",1)[0]
file_id = os.path.basename(os.path.normpath(file_id))
temp_path = os.path.join(GT_PATH, (file_id + ".txt"))
if class_index == 0:
if not os.path.exists(temp_path):
error_msg = "Error. File not found: {}\n".format(temp_path)
error(error_msg)
lines = file_lines_to_list(txt_file)
for line in lines:
try:
tmp_class_name, confidence, left, top, right, bottom = line.split()
except:
line_split = line.split()
bottom = line_split[-1]
right = line_split[-2]
top = line_split[-3]
left = line_split[-4]
confidence = line_split[-5]
tmp_class_name = ""
for name in line_split[:-5]:
tmp_class_name += name + " "
tmp_class_name = tmp_class_name[:-1]
# 如果属于该类
if tmp_class_name == class_name:
bbox = left + " " + top + " " + right + " " +bottom
bounding_boxes.append({"confidence":confidence, "file_id":file_id, "bbox":bbox})
bounding_boxes.sort(key=lambda x:float(x['confidence']), reverse=True)
with open(TEMP_FILES_PATH + "/" + class_name + "_dr.json", 'w') as outfile:
json.dump(bounding_boxes, outfile)
2.3 计算AP
sum_AP = 0.0
ap_dictionary = {}
lamr_dictionary = {}
with open(RESULTS_FILES_PATH + "/results.txt", 'w') as results_file:
results_file.write("# AP and precision/recall per class\n")
count_true_positives = {}
# 对每一类进行计算ap
for class_index, class_name in enumerate(gt_classes):
count_true_positives[class_name] = 0
dr_file = TEMP_FILES_PATH + "/" + class_name + "_dr.json"
dr_data = json.load(open(dr_file))
# nd为每一类预测值的总数量
nd = len(dr_data)
# 所有的tp,fp先为0
tp = [0] * nd
fp = [0] * nd
score = [0] * nd
score05_idx = 0
'''
对于每个预测值,判断其是否是TP
主要依据:
1.如果没有gt框与其的iou>0.5,那么就是FP
2.如果有iou>0.5,那么看这个gt框有没有被别的预测值占用,因为一个gt框只能匹配一个预测框,占用即FP,没有占用就TP
'''
#
for idx, detection in enumerate(dr_data):
file_id = detection["file_id"]
score[idx] = float(detection["confidence"])
if score[idx] > 0.5:
score05_idx = idx
gt_file = TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json"
ground_truth_data = json.load(open(gt_file))
ovmax = -1
gt_match = -1
# 得到预测的所有bbox
bb = [float(x) for x in detection["bbox"].split()]
# 循环计算iou,找出与当前预测框iou最大的gt框
for obj in ground_truth_data:
if obj["class_name"] == class_name:
# 计算两个框的iou-->ov
bbgt = [float(x) for x in obj["bbox"].split()]
bi = [max(bb[0], bbgt[0]), max(bb[1], bbgt[1]), min(bb[2], bbgt[2]), min(bb[3], bbgt[3])]
iw = bi[2] - bi[0] + 1
ih = bi[3] - bi[1] + 1
if iw > 0 and ih > 0:
ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + (bbgt[2] - bbgt[0]
+ 1) * (bbgt[3] - bbgt[1] + 1) - iw * ih
# ov就是iou
ov = iw * ih / ua
if ov > ovmax:
ovmax = ov
gt_match = obj
min_overlap = MINOVERLAP
if ovmax >= min_overlap:
if "difficult" not in gt_match:
if not bool(gt_match["used"]):
tp[idx] = 1
gt_match["used"] = True
count_true_positives[class_name] += 1
# 重新写入文件,表明该gt框被占用
with open(gt_file, 'w') as f:
f.write(json.dumps(ground_truth_data))
else:
fp[idx] = 1
else:
fp[idx] = 1
TP和FP计算完成,可以先调用库直接算AP
epsilon = 1e-6 # 防止分母为0
# 将tp,fp转成tensor
tparray=torch.Tensor(tp)
fparray = torch.Tensor(fp)
TP_cumsum = torch.cumsum(tparray,dim=0)
FP_cumsum = torch.cumsum(fparray,dim=0)
#计算recalls和precisions
recalls = TP_cumsum / (gt_counter_per_class[class_name] + epsilon)
precisions = torch.divide(TP_cumsum, (TP_cumsum + FP_cumsum+epsilon))
precisions = torch.cat((torch.tensor([0]), precisions))
recalls = torch.cat((torch.tensor([1]), recalls))
# 使用trapz计算AP trapz(y,x)
ap=(torch.trapz(precisions, recalls))
print(f'ave:{ap}')
下面手动计算AP
# fp中1的数量
cumsum = 0
for idx, val in enumerate(fp):
fp[idx] += cumsum
cumsum += val
# 将[0,0,1,1,1,1]的形式转成[0,0,1,2,3,4]的形式
# 累加表明到这儿有多少TP/FP
# tp中1的数量
cumsum = 0
for idx, val in enumerate(tp):
tp[idx] += cumsum
cumsum += val
rec = tp[:]
# 到idx时的recall是多少
for idx, val in enumerate(tp):
rec[idx] = float(tp[idx]) / np.maximum(gt_counter_per_class[class_name], 1)
# print(f'tp[idx]:{idx,tp[idx]},gt_counter_per_class[class_name]::{gt_counter_per_class[class_name]}')
prec = tp[:]
for idx, val in enumerate(tp):
prec[idx] = float(tp[idx]) / np.maximum((fp[idx] + tp[idx]), 1)
ap, mrec, mprec = voc_ap(rec[:], prec[:])
print(f'ap:{ap}')
但实际上,使用官方库函数和voc_ap的计算结果不一样。
是因为torch.trapz的问题
import numpy as np
import matplotlib.pyplot as plt
def voc_ap(rec, prec):
"""
--- Official matlab code VOC2012---
mrec=[0 ; rec ; 1];
mpre=[0 ; prec ; 0];
for i=numel(mpre)-1:-1:1
mpre(i)=max(mpre(i),mpre(i+1));
end
i=find(mrec(2:end)~=mrec(1:end-1))+1;
ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
"""
rec.insert(0, 0.0) # insert 0.0 at begining of list
rec.append(1.0) # insert 1.0 at end of list
mrec = rec[:]
prec.insert(0, 1.0) # insert 0.0 at begining of list
prec.append(0.0) # insert 0.0 at end of list
mpre = prec[:]
"""
This part makes the precision monotonically decreasing
(goes from the end to the beginning)
matlab: for i=numel(mpre)-1:-1:1
mpre(i)=max(mpre(i),mpre(i+1));
"""
for i in range(len(mpre)-2, -1, -1):
mpre[i] = max(mpre[i], mpre[i+1])
"""
This part creates a list of indexes where the recall changes
matlab: i=find(mrec(2:end)~=mrec(1:end-1))+1;
"""
i_list = []
for i in range(1, len(mrec)):
if mrec[i] != mrec[i-1]:
i_list.append(i) # if it was matlab would be i + 1
print(mrec[i])
"""
The Average Precision (AP) is the area under the curve
(numerical integration)
matlab: ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
"""
'''
nb! 记录曲线变化的点,将前面看成一个矩形,然后累加矩形的面积
'''
ap = 0.0
for i in i_list:
ap += ((mrec[i]-mrec[i-1])*mpre[i])
print(ap,mrec[i],mrec[i-1],mpre[i])
return ap, mrec, mpre
y=[0.6,0.5,0.4,0.3,0.2,0.1]
x=[0.1,0.1,0.2,0.2,0.3,0.3]
res=np.trapz(y,x)
print(res)
res2=voc_ap(x,y)
print(res2)
plt.plot(x,y)
plt.show()
官方库的结果0.07
voc_ap的结果为0.12
#接上计算F1等其他值
F1 = np.array(rec)*np.array(prec)*2 / np.where((np.array(prec)+np.array(rec))==0, 1, (np.array(prec)+np.array(rec)))
sum_AP += ap
text = "{0:.2f}%".format(ap*100) + " = " + class_name + " AP " #class_name + " AP = {0:.2f}%".format(ap*100)
if len(prec)>0:
F1_text = "{0:.2f}".format(F1[score05_idx]) + " = " + class_name + " F1 "
Recall_text = "{0:.2f}%".format(rec[score05_idx]*100) + " = " + class_name + " Recall "
Precision_text = "{0:.2f}%".format(prec[score05_idx]*100) + " = " + class_name + " Precision "
else:
F1_text = "0.00" + " = " + class_name + " F1 "
Recall_text = "0.00%" + " = " + class_name + " Recall "
Precision_text = "0.00%" + " = " + class_name + " Precision "
rounded_prec = [ '%.2f' % elem for elem in prec ]
rounded_rec = [ '%.2f' % elem for elem in rec ]
results_file.write(text + "\n Precision: " + str(rounded_prec) + "\n Recall :" + str(rounded_rec) + "\n\n")
if len(prec)>0:
print(text + "\t||\tscore_threhold=0.5 : " + "F1=" + "{0:.2f}".format(F1[score05_idx])\
+ " ; Recall=" + "{0:.2f}%".format(rec[score05_idx]*100) + " ; Precision=" + "{0:.2f}%".format(prec[score05_idx]*100))
else:
print(text + "\t||\tscore_threhold=0.5 : F1=0.00% ; Recall=0.00% ; Precision=0.00%")
ap_dictionary[class_name] = ap