一、预测过程
letterbox_image为了防止失帧,不进行简单的resize,先放大图片,进行三次样条插值,创建一个300*300的灰色图片,把放大后的图片粘贴到灰色图片上,相当于在边缘加上灰条。
def letterbox_image(image, size):
iw, ih = image.size
w, h = size
scale = min(w/iw, h/ih)
nw = int(iw*scale) # nw,nh一定有一个是300
nh = int(ih*scale)
image = image.resize((nw,nh), Image.BICUBIC) # 三次样条插值
new_image = Image.new('RGB', size, (128,128,128)) # 创建一个300*300的灰色图
new_image.paste(image, ((w-nw)//2, (h-nh)//2)) # 把插值后的图片粘贴到灰色图中,指定左上角坐标
return new_image
将预测的的框变成真实图片的框
def encode(matched, priors, variances):
g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2]
g_cxcy /= (variances[0] * priors[:, 2:])
g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
g_wh = torch.log(g_wh) / variances[1]
return torch.cat([g_cxcy, g_wh], 1)
计算所有的先验框和真实框的重合程度
def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx):
# 计算所有的先验框和真实框的重合程度
# [truth_box, num_prior]
overlaps = jaccard(
truths,
point_form(priors)
)
# 所有真实框和先验框的最好重合程度
# [truth_box,1]
best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
best_prior_idx.squeeze_(1)
best_prior_overlap.squeeze_(1)
# 所有先验框和真实框的最好重合程度
# [1,prior]
best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
best_truth_idx.squeeze_(0)
best_truth_overlap.squeeze_(0)
# 找到与真实框重合程度最好的先验框,用于保证每个真实框都要有对应的一个先验框
best_truth_overlap.index_fill_(0, best_prior_idx, 2)
# 对best_truth_idx内容进行设置
for j in range(best_prior_idx.size(0)):
best_truth_idx[best_prior_idx[j]] = j
# 找到每个先验框重合程度最好的真实框
matches = truths[best_truth_idx] # Shape: [num_priors,4]
conf = labels[best_truth_idx] + 1 # Shape: [num_priors]
# 如果重合程度小于threhold则认为是背景
conf[best_truth_overlap < threshold] = 0 # label as background
loc = encode(matches, priors, variances)
loc_t[idx] = loc # [num_priors,4] encoded offsets to learn
conf_t[idx] = conf # [num_priors] top class label for each prior
检测图片
def detect_image(self, image):
image_shape = np.array(np.shape(image)[0:2])
# letterbox_image为了防止失帧,不是简单resize,而是在边缘加上灰条
crop_img = np.array(letterbox_image(image, (self.model_image_size[0],self.model_image_size[1])))
photo = np.array(crop_img,dtype = np.float64)
# 图片预处理,归一化
with torch.no_grad():
# 从每个图像通道中减去给定的均值,torch中是BGR,transpose转换一下
photo = torch.from_numpy(np.expand_dims(np.transpose(crop_img-MEANS,(2,0,1)),0)).type(torch.FloatTensor)
if self.cuda:
photo = photo.cuda()
preds = self.net(photo) # 把photo传入net中得到预测结果
top_conf = []
top_label = []
top_bboxes = []
for i in range(preds.size(1)): # pred.size = (1,21,200,5),遍历21个类
j = 0
while preds[0, i,