目标检测和边界框
% matplotlib inline
from PIL import Image
import sys
sys. path. append( '/home/input/' )
import d2lzh1981 as d2l
d2l. set_figsize( )
img = Image. open ( '/home/input/img2083/img/catdog.jpg' )
d2l. plt. imshow( img) ;
边界框
dog_bbox, cat_bbox = [ 60 , 45 , 378 , 516 ] , [ 400 , 112 , 655 , 493 ]
def bbox_to_rect ( bbox, color) :
return d2l. plt. Rectangle(
xy= ( bbox[ 0 ] , bbox[ 1 ] ) , width= bbox[ 2 ] - bbox[ 0 ] , height= bbox[ 3 ] - bbox[ 1 ] ,
fill= False , edgecolor= color, linewidth= 2 )
fig = d2l. plt. imshow( img)
fig. axes. add_patch( bbox_to_rect( dog_bbox, 'blue' ) )
fig. axes. add_patch( bbox_to_rect( cat_bbox, 'red' ) ) ;
锚框
import numpy as np
import math
import torch
import os
IMAGE_DIR = '/home/input/img2083/img/'
print ( torch. __version__)
生成多个锚框
d2l. set_figsize( )
img = Image. open ( os. path. join( IMAGE_DIR, 'catdog.jpg' ) )
w, h = img. size
print ( "w = %d, h = %d" % ( w, h) )
def MultiBoxPrior ( feature_map, sizes= [ 0.75 , 0.5 , 0.25 ] , ratios= [ 1 , 2 , 0.5 ] ) :
"""
# 按照「9.4.1. 生成多个锚框」所讲的实现, anchor表示成(xmin, ymin, xmax, ymax).
https://zh.d2l.ai/chapter_computer-vision/anchor.html
Args:
feature_map: torch tensor, Shape: [N, C, H, W].
sizes: List of sizes (0~1) of generated MultiBoxPriores.
ratios: List of aspect ratios (non-negative) of generated MultiBoxPriores.
Returns:
anchors of shape (1, num_anchors, 4). 由于batch里每个都一样, 所以第一维为1
"""
pairs = [ ]
for r in ratios:
pairs. append( [ sizes[ 0 ] , math. sqrt( r) ] )
for s in sizes[ 1 : ] :
pairs. append( [ s, math. sqrt( ratios[ 0 ] ) ] )
pairs = np. array( pairs)
ss1 = pairs[ : , 0 ] * pairs[ : , 1 ]
ss2 = pairs[ : , 0 ] / pairs[ : , 1 ]
base_anchors = np. stack( [ - ss1, - ss2, ss1, ss2] , axis= 1 ) / 2
h, w = feature_map. shape[ - 2 : ]
shifts_x = np. arange( 0 , w) / w
shifts_y = np. arange( 0 , h) / h
shift_x, shift_y = np. meshgrid( shifts_x, shifts_y)
shift_x = shift_x. reshape( - 1 )
shift_y = shift_y. reshape( - 1 )
shifts = np. stack( ( shift_x, shift_y, shift_x, shift_y) , axis= 1 )
anchors = shifts. reshape( ( - 1 , 1 , 4 ) ) + base_anchors. reshape( ( 1 , - 1 , 4 ) )
return torch. tensor( anchors, dtype= torch. float32) . view( 1 , - 1 , 4 )
X = torch. Tensor( 1 , 3 , h, w)
Y = MultiBoxPrior( X, sizes= [ 0.75 , 0.5 , 0.25 ] , ratios= [ 1 , 2 , 0.5 ] )
Y. shape
boxes = Y. reshape( ( h, w, 5 , 4 ) )
boxes[ 250 , 250 , 0 , : ]
def show_bboxes ( axes, bboxes, labels= None , colors= None ) :
def _make_list ( obj, default_values= None ) :
if obj is None :
obj = default_values
elif not isinstance ( obj, ( list , tuple ) ) :
obj = [ obj]
return obj
labels = _make_list( labels)
colors = _make_list( colors, [ 'b' , 'g' , 'r' , 'm' , 'c' ] )
for i, bbox in enumerate ( bboxes) :
color = colors[ i % len ( colors) ]
rect = d2l. bbox_to_rect( bbox. detach( ) . cpu( ) . numpy( ) , color)
axes. add_patch( rect)
if labels and len ( labels) > i:
text_color = 'k' if color == 'w' else 'w'
axes. text( rect. xy[ 0 ] , rect. xy[ 1 ] , labels[ i] ,
va= 'center' , ha= 'center' , fontsize= 6 , color= text_color,
bbox= dict ( facecolor= color, lw= 0 ) )
d2l. set_figsize( )
fig = d2l. plt. imshow( img)
bbox_scale = torch. tensor( [ [ w, h, w, h] ] , dtype= torch. float32)
show_bboxes( fig. axes, boxes[ 250 , 250 , : , : ] * bbox_scale,
[ 's=0.75, r=1' , 's=0.75, r=2' , 's=0.75, r=0.5' , 's=0.5, r=1' , 's=0.25, r=1' ] )
交并比
def compute_intersection ( set_1, set_2) :
"""
计算anchor之间的交集
Args:
set_1: a tensor of dimensions (n1, 4), anchor表示成(xmin, ymin, xmax, ymax)
set_2: a tensor of dimensions (n2, 4), anchor表示成(xmin, ymin, xmax, ymax)
Returns:
intersection of each of the boxes in set 1 with respect to each of the boxes in set 2, shape: (n1, n2)
"""
lower_bounds = torch. max ( set_1[ : , : 2 ] . unsqueeze( 1 ) , set_2[ : , : 2 ] . unsqueeze( 0 ) )
upper_bounds = torch. min ( set_1[ : , 2 : ] . unsqueeze( 1 ) , set_2[ : , 2 : ] . unsqueeze( 0 ) )
intersection_dims = torch. clamp( upper_bounds - lower_bounds, min = 0 )
return intersection_dims[ : , : , 0 ] * intersection_dims[ : , : , 1 ]
def compute_jaccard ( set_1, set_2) :
"""
计算anchor之间的Jaccard系数(IoU)
Args:
set_1: a tensor of dimensions (n1, 4), anchor表示成(xmin, ymin, xmax, ymax)
set_2: a tensor of dimensions (n2, 4), anchor表示成(xmin, ymin, xmax, ymax)
Returns:
Jaccard Overlap of each of the boxes in set 1 with respect to each of the boxes in set 2, shape: (n1, n2)
"""
intersection = compute_intersection( set_1, set_2)
areas_set_1 = ( set_1[ : , 2 ] - set_1[ : , 0 ] ) * ( set_1[ : , 3 ] - set_1[ : , 1 ] )
areas_set_2 = ( set_2[ : , 2 ] - set_2[ : , 0 ] ) * ( set_2[ : , 3 ] - set_2[ : , 1 ] )
union = areas_set_1. unsqueeze( 1 ) + areas_set_2. unsqueeze( 0 ) - intersection
return intersection / union
标注训练集的锚框
bbox_scale = torch. tensor( ( w, h, w, h) , dtype= torch. float32)
ground_truth = torch. tensor( [ [ 0 , 0.1 , 0.08 , 0.52 , 0.92 ] ,
[ 1 , 0.55 , 0.2 , 0.9 , 0.88 ] ] )
anchors = torch. tensor( [ [ 0 , 0.1 , 0.2 , 0.3 ] , [ 0.15 , 0.2 , 0.4 , 0.4 ] ,
[ 0.63 , 0.05 , 0.88 , 0.98 ] , [ 0.66 , 0.45 , 0.8 , 0.8 ] ,
[ 0.57 , 0.3 , 0.92 , 0.9 ] ] )
fig = d2l. plt. imshow( img)
show_bboxes( fig. axes, ground_truth[ : , 1 : ] * bbox_scale, [ 'dog' , 'cat' ] , 'k' )
show_bboxes( fig. axes, anchors * bbox_scale, [ '0' , '1' , '2' , '3' , '4' ] ) ;
compute_jaccard( anchors, ground_truth[ : , 1 : ] )
def assign_anchor ( bb, anchor, jaccard_threshold= 0.5 ) :
"""
# 按照「9.4.1. 生成多个锚框」图9.3所讲为每个anchor分配真实的bb, anchor表示成归一化(xmin, ymin, xmax, ymax).
https://zh.d2l.ai/chapter_computer-vision/anchor.html
Args:
bb: 真实边界框(bounding box), shape:(nb, 4)
anchor: 待分配的anchor, shape:(na, 4)
jaccard_threshold: 预先设定的阈值
Returns:
assigned_idx: shape: (na, ), 每个anchor分配的真实bb对应的索引, 若未分配任何bb则为-1
"""
na = anchor. shape[ 0 ]
nb = bb. shape[ 0 ]
jaccard = compute_jaccard( anchor, bb) . detach( ) . cpu( ) . numpy( )
assigned_idx = np. ones( na) * - 1
jaccard_cp = jaccard. copy( )
for j in range ( nb) :
i = np. argmax( jaccard_cp[ : , j] )
assigned_idx[ i] = j
jaccard_cp[ i, : ] = float ( "-inf" )
for i in range ( na) :
if assigned_idx[ i] == - 1 :
j = np. argmax( jaccard[ i, : ] )
if jaccard[ i, j] >= jaccard_threshold:
assigned_idx[ i] = j
return torch. tensor( assigned_idx, dtype= torch. long )
def xy_to_cxcy ( xy) :
"""
将(x_min, y_min, x_max, y_max)形式的anchor转换成(center_x, center_y, w, h)形式的.
https://github.com/sgrvinod/a-PyTorch-Tutorial-to-Object-Detection/blob/master/utils.py
Args:
xy: bounding boxes in boundary coordinates, a tensor of size (n_boxes, 4)
Returns:
bounding boxes in center-size coordinates, a tensor of size (n_boxes, 4)
"""
return torch. cat( [ ( xy[ : , 2 : ] + xy[ : , : 2 ] ) / 2 ,
xy[ : , 2 : ] - xy[ : , : 2 ] ] , 1 )
def MultiBoxTarget ( anchor, label) :
"""
# 按照「9.4.1. 生成多个锚框」所讲的实现, anchor表示成归一化(xmin, ymin, xmax, ymax).
https://zh.d2l.ai/chapter_computer-vision/anchor.html
Args:
anchor: torch tensor, 输入的锚框, 一般是通过MultiBoxPrior生成, shape:(1,锚框总数,4)
label: 真实标签, shape为(bn, 每张图片最多的真实锚框数, 5)
第二维中,如果给定图片没有这么多锚框, 可以先用-1填充空白, 最后一维中的元素为[类别标签, 四个坐标值]
Returns:
列表, [bbox_offset, bbox_mask, cls_labels]
bbox_offset: 每个锚框的标注偏移量,形状为(bn,锚框总数*4)
bbox_mask: 形状同bbox_offset, 每个锚框的掩码, 一一对应上面的偏移量, 负类锚框(背景)对应的掩码均为0, 正类锚框的掩码均为1
cls_labels: 每个锚框的标注类别, 其中0表示为背景, 形状为(bn,锚框总数)
"""
assert len ( anchor. shape) == 3 and len ( label. shape) == 3
bn = label. shape[ 0 ]
def MultiBoxTarget_one ( anc, lab, eps= 1e - 6 ) :
"""
MultiBoxTarget函数的辅助函数, 处理batch中的一个
Args:
anc: shape of (锚框总数, 4)
lab: shape of (真实锚框数, 5), 5代表[类别标签, 四个坐标值]
eps: 一个极小值, 防止log0
Returns:
offset: (锚框总数*4, )
bbox_mask: (锚框总数*4, ), 0代表背景, 1代表非背景
cls_labels: (锚框总数, 4), 0代表背景
"""
an = anc. shape[ 0 ]
assigned_idx = assign_anchor( lab[ : , 1 : ] , anc)
print ( "a: " , assigned_idx. shape)
print ( assigned_idx)
bbox_mask = ( ( assigned_idx >= 0 ) . float ( ) . unsqueeze( - 1 ) ) . repeat( 1 , 4 )
print ( "b: " , bbox_mask. shape)
print ( bbox_mask)
cls_labels = torch. zeros( an, dtype= torch. long )
assigned_bb = torch. zeros( ( an, 4 ) , dtype= torch. float32)
for i in range ( an) :
bb_idx = assigned_idx[ i]
if bb_idx >= 0 :
cls_labels[ i] = lab[ bb_idx, 0 ] . long ( ) . item( ) + 1
assigned_bb[ i, : ] = lab[ bb_idx, 1 : ]
center_anc = xy_to_cxcy( anc)
center_assigned_bb = xy_to_cxcy( assigned_bb)
offset_xy = 10.0 * ( center_assigned_bb[ : , : 2 ] - center_anc[ : , : 2 ] ) / center_anc[ : , 2 : ]
offset_wh = 5.0 * torch. log( eps + center_assigned_bb[ : , 2 : ] / center_anc[ : , 2 : ] )
offset = torch. cat( [ offset_xy, offset_wh] , dim = 1 ) * bbox_mask
return offset. view( - 1 ) , bbox_mask. view( - 1 ) , cls_labels
batch_offset = [ ]
batch_mask = [ ]
batch_cls_labels = [ ]
for b in range ( bn) :
offset, bbox_mask, cls_labels = MultiBoxTarget_one( anchor[ 0 , : , : ] , label[ b, : , : ] )
batch_offset. append( offset)
batch_mask. append( bbox_mask)
batch_cls_labels. append( cls_labels)
bbox_offset = torch. stack( batch_offset)
bbox_mask = torch. stack( batch_mask)
cls_labels = torch. stack( batch_cls_labels)
return [ bbox_offset, bbox_mask, cls_labels]
labels = MultiBoxTarget( anchors. unsqueeze( dim= 0 ) ,
ground_truth. unsqueeze( dim= 0 ) )
输出预测边界框
anchors = torch. tensor( [ [ 0.1 , 0.08 , 0.52 , 0.92 ] , [ 0.08 , 0.2 , 0.56 , 0.95 ] ,
[ 0.15 , 0.3 , 0.62 , 0.91 ] , [ 0.55 , 0.2 , 0.9 , 0.88 ] ] )
offset_preds = torch. tensor( [ 0.0 ] * ( 4 * len ( anchors) ) )
cls_probs = torch. tensor( [ [ 0 . , 0 . , 0 . , 0 . , ] ,
[ 0.9 , 0.8 , 0.7 , 0.1 ] ,
[ 0.1 , 0.2 , 0.3 , 0.9 ] ] )
fig = d2l. plt. imshow( img)
show_bboxes( fig. axes, anchors * bbox_scale,
[ 'dog=0.9' , 'dog=0.8' , 'dog=0.7' , 'cat=0.9' ] )
from collections import namedtuple
Pred_BB_Info = namedtuple( "Pred_BB_Info" , [ "index" , "class_id" , "confidence" , "xyxy" ] )
def non_max_suppression ( bb_info_list, nms_threshold = 0.5 ) :
"""
非极大抑制处理预测的边界框
Args:
bb_info_list: Pred_BB_Info的列表, 包含预测类别、置信度等信息
nms_threshold: 阈值
Returns:
output: Pred_BB_Info的列表, 只保留过滤后的边界框信息
"""
output = [ ]
sorted_bb_info_list = sorted ( bb_info_list, key = lambda x: x. confidence, reverse= True )
while len ( sorted_bb_info_list) != 0 :
best = sorted_bb_info_list. pop( 0 )
output. append( best)
if len ( sorted_bb_info_list) == 0 :
break
bb_xyxy = [ ]
for bb in sorted_bb_info_list:
bb_xyxy. append( bb. xyxy)
iou = compute_jaccard( torch. tensor( [ best. xyxy] ) ,
torch. tensor( bb_xyxy) ) [ 0 ]
n = len ( sorted_bb_info_list)
sorted_bb_info_list = [ sorted_bb_info_list[ i] for i in range ( n) if iou[ i] <= nms_threshold]
return output
def MultiBoxDetection ( cls_prob, loc_pred, anchor, nms_threshold = 0.5 ) :
"""
# 按照「9.4.1. 生成多个锚框」所讲的实现, anchor表示成归一化(xmin, ymin, xmax, ymax).
https://zh.d2l.ai/chapter_computer-vision/anchor.html
Args:
cls_prob: 经过softmax后得到的各个锚框的预测概率, shape:(bn, 预测总类别数+1, 锚框个数)
loc_pred: 预测的各个锚框的偏移量, shape:(bn, 锚框个数*4)
anchor: MultiBoxPrior输出的默认锚框, shape: (1, 锚框个数, 4)
nms_threshold: 非极大抑制中的阈值
Returns:
所有锚框的信息, shape: (bn, 锚框个数, 6)
每个锚框信息由[class_id, confidence, xmin, ymin, xmax, ymax]表示
class_id=-1 表示背景或在非极大值抑制中被移除了
"""
assert len ( cls_prob. shape) == 3 and len ( loc_pred. shape) == 2 and len ( anchor. shape) == 3
bn = cls_prob. shape[ 0 ]
def MultiBoxDetection_one ( c_p, l_p, anc, nms_threshold = 0.5 ) :
"""
MultiBoxDetection的辅助函数, 处理batch中的一个
Args:
c_p: (预测总类别数+1, 锚框个数)
l_p: (锚框个数*4, )
anc: (锚框个数, 4)
nms_threshold: 非极大抑制中的阈值
Return:
output: (锚框个数, 6)
"""
pred_bb_num = c_p. shape[ 1 ]
anc = ( anc + l_p. view( pred_bb_num, 4 ) ) . detach( ) . cpu( ) . numpy( )
confidence, class_id = torch. max ( c_p, 0 )
confidence = confidence. detach( ) . cpu( ) . numpy( )
class_id = class_id. detach( ) . cpu( ) . numpy( )
pred_bb_info = [ Pred_BB_Info(
index = i,
class_id = class_id[ i] - 1 ,
confidence = confidence[ i] ,
xyxy= [ * anc[ i] ] )
for i in range ( pred_bb_num) ]
obj_bb_idx = [ bb. index for bb in non_max_suppression( pred_bb_info, nms_threshold) ]
output = [ ]
for bb in pred_bb_info:
output. append( [
( bb. class_id if bb. index in obj_bb_idx else - 1.0 ) ,
bb. confidence,
* bb. xyxy
] )
return torch. tensor( output)
batch_output = [ ]
for b in range ( bn) :
batch_output. append( MultiBoxDetection_one( cls_prob[ b] , loc_pred[ b] , anchor[ 0 ] , nms_threshold) )
return torch. stack( batch_output)
output = MultiBoxDetection(
cls_probs. unsqueeze( dim= 0 ) , offset_preds. unsqueeze( dim= 0 ) ,
anchors. unsqueeze( dim= 0 ) , nms_threshold= 0.5 )
output
fig = d2l. plt. imshow( img)
for i in output[ 0 ] . detach( ) . cpu( ) . numpy( ) :
if i[ 0 ] == - 1 :
continue
label = ( 'dog=' , 'cat=' ) [ int ( i[ 0 ] ) ] + str ( i[ 1 ] )
show_bboxes( fig. axes, [ torch. tensor( i[ 2 : ] ) * bbox_scale] , label)