**************************前面内容省略******************** 1)对输入网络图片预处理 2)定义网络 3恢复模型参数 4)定义默认的anchor #-------------------------------------------------对输入图片进行预处理------------------------------------------ #对bbox进行剪切处理 def bboxes_clip(bbox_ref, bboxes): """Clip bounding boxes with respect to reference bbox. """ bboxes = np.copy(bboxes) bboxes = np.transpose(bboxes) bbox_ref = np.transpose(bbox_ref) bboxes[0] = np.maximum(bboxes[0], bbox_ref[0]) bboxes[1] = np.maximum(bboxes[1], bbox_ref[1]) bboxes[2] = np.minimum(bboxes[2], bbox_ref[2]) bboxes[3] = np.minimum(bboxes[3], bbox_ref[3]) bboxes = np.transpose(bboxes) return bboxes #对bbox筛选(保存top_k) def bboxes_sort(classes, scores, bboxes, top_k=400): """Sort bounding boxes by decreasing order and keep only the top_k """ # if priority_inside: # inside = (bboxes[:, 0] > margin) & (bboxes[:, 1] > margin) & \ # (bboxes[:, 2] < 1-margin) & (bboxes[:, 3] < 1-margin) # idxes = np.argsort(-scores) # inside = inside[idxes] # idxes = np.concatenate([idxes[inside], idxes[~inside]]) idxes = np.argsort(-scores) classes = classes[idxes][:top_k] scores = scores[idxes][:top_k] bboxes = bboxes[idxes][:top_k] return classes, scores, bboxes #NMS(按阈值进行筛选) def bboxes_nms(classes, scores, bboxes, nms_threshold=0.45): """Apply non-maximum selection to bounding boxes. """ keep_bboxes = np.ones(scores.shape, dtype=np.bool) for i in range(scores.size-1): if keep_bboxes[i]: # Computer overlap with bboxes which are following. overlap = bboxes_jaccard(bboxes[i], bboxes[(i+1):]) # Overlap threshold for keeping + checking part of the same class keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i+1):] != classes[i]) keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap) idxes = np.where(keep_bboxes) return classes[idxes], scores[idxes], bboxes[idxes]
#对bbox调整大小(按规定比例)
def bboxes_resize(bbox_ref, bboxes): """Resize bounding boxes based on a reference bounding box, assuming that the latter is [0, 0, 1, 1] after transform. """ bboxes = np.copy(bboxes) # Translate. bboxes[:, 0] -= bbox_ref[0] bboxes[:, 1] -= bbox_ref[1] bboxes[:, 2] -= bbox_ref[0] bboxes[:, 3] -= bbox_ref[1] # Resize. resize = [bbox_ref[2] - bbox_ref[0], bbox_ref[3] - bbox_ref[1]] bboxes[:, 0] /= resize[0] bboxes[:, 1] /= resize[1] bboxes[:, 2] /= resize[0] bboxes[:, 3] /= resize[1] return bboxes
#从某一层中获取分类,得分,bbox def ssd_bboxes_select_layer(predictions_layer, localizations_layer, anchors_layer, select_threshold=0.5, img_shape=(300, 300), num_classes=21, decode=True): """Extract classes, scores and bounding boxes from features in one layer. Return: classes, scores, bboxes: Numpy arrays... """ # First decode localizations features if necessary. if decode: localizations_layer = ssd_bboxes_decode(localizations_layer, anchors_layer) # Reshape features to: Batches x N x N_labels | 4. p_shape = predictions_layer.shape batch_size = p_shape[0] if len(p_shape) == 5 else 1 predictions_layer = np.reshape(predictions_layer, (batch_size, -1, p_shape[-1])) l_shape = localizations_layer.shape localizations_layer = np.reshape(localizations_layer, (batch_size, -1, l_shape[-1])) # Boxes selection: use threshold or score > no-label criteria. if select_threshold is None or select_threshold == 0: # Class prediction and scores: assign 0. to 0-class classes = np.argmax(predictions_layer, axis=2) scores = np.amax(predictions_layer, axis=2) mask = (classes > 0) classes = classes[mask] scores = scores[mask] bboxes = localizations_layer[mask] else: sub_predictions = predictions_layer[:, :, 1:] idxes = np.where(sub_predictions > select_threshold) classes = idxes[-1]+1 scores = sub_predictions[idxes] bboxes = localizations_layer[idxes[:-1]] return classes, scores, bboxes
#从所有输出层获取到分类,分数,bbox def ssd_bboxes_select(predictions_net, localizations_net, anchors_net, select_threshold=0.5, img_shape=(300, 300), num_classes=21, decode=True): """Extract classes, scores and bounding boxes from network output layers. Return: classes, scores, bboxes: Numpy arrays... """ l_classes = [] l_scores = [] l_bboxes = [] # l_layers = [] # l_idxes = [] for i in range(len(predictions_net)): classes, scores, bboxes = ssd_bboxes_select_layer( predictions_net[i], localizations_net[i], anchors_net[i], select_threshold, img_shape, num_classes, decode) l_classes.append(classes) l_scores.append(scores) l_bboxes.append(bboxes) # Debug information. # l_layers.append(i) # l_idxes.append((i, idxes)) classes = np.concatenate(l_classes, 0) scores = np.concatenate(l_scores, 0) bboxes = np.concatenate(l_bboxes, 0) return classes, scores, bboxes
#输入图片进行处理 # Main image processing routine. def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): # Run SSD network. rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=15, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes
#-------------------------------------------------对输入图片进行预处理--------------------------------------- #画框 def bboxes_draw_on_img(img, classes, scores, bboxes, colors=dict(), thickness=2): shape = img.shape ####add 20180516##### #colors=dict() ####add ############# for i in range(bboxes.shape[0]): bbox = bboxes[i] if classes[i] not in colors: colors[classes[i]] = (0, 0, 255) p1 = (int(bbox[0] * shape[0]), int(bbox[1] * shape[1])) p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1])) cv2.rectangle(img, p1[::-1], p2[::-1], colors[classes[i]], thickness) s = '%s/%.3f' % (num2class(classes[i]), scores[i]) p1 = (p1[0]-5, p1[1]) cv2.putText(img, s, p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.4, colors[classes[i]], 1)
# def preprocess_for_eval(image, labels, bboxes, out_shape=EVAL_SIZE, data_format='NHWC', difficults=None, resize=Resize.WARP_RESIZE, scope='ssd_preprocessing_train'): """Preprocess an image for evaluation. Args: image: A `Tensor` representing an image of arbitrary size. out_shape: Output shape after pre-processing (if resize != None) resize: Resize strategy. Returns: A preprocessed image. """ with tf.name_scope(scope): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') image = tf.to_float(image) image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Add image rectangle to bboxes. bbox_img = tf.constant([[0., 0., 1., 1.]]) if bboxes is None: bboxes = bbox_img else: bboxes = tf.concat([bbox_img, bboxes], axis=0) if resize == Resize.NONE: # No resizing... pass elif resize == Resize.CENTRAL_CROP: # Central cropping of the image. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.PAD_AND_RESIZE: # Resize image first: find the correct factor... shape = tf.shape(image) factor = tf.minimum(tf.to_double(1.0), tf.minimum(tf.to_double(out_shape[0] / shape[0]), tf.to_double(out_shape[1] / shape[1]))) resize_shape = factor * tf.to_double(shape[0:2]) resize_shape = tf.cast(tf.floor(resize_shape), tf.int32) image = tf_image.resize_image(image, resize_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Pad to expected size. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.WARP_RESIZE: # Warp resize of the image. image = tf_image.resize_image(image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Split back bounding boxes. bbox_img = bboxes[0] bboxes = bboxes[1:] # Remove difficult boxes. if difficults is not None: mask = tf.logical_not(tf.cast(difficults, tf.bool)) labels = tf.boolean_mask(labels, mask) bboxes = tf.boolean_mask(bboxes, mask) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes, bbox_img
#------------------------------------------主程序------------------------------ cap = cv2.VideoCapture(r'/home/chenyu/Documents/SSD-Tensorflow-master/candou.dav') fps = cap.get(cv2.CAP_PROP_FPS) size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) fourcc = cap.get(cv2.CAP_PROP_FOURCC) #fourcc = cv2.CAP_PROP_FOURCC(*'CVID') print('fps=%d,size=%r,fourcc=%r'%(fps,size,fourcc)) delay=30/int(fps) #延时,防卡
i=0 while(cap.isOpened()): ret,frame = cap.read() if ret==True: image_np_expanded = np.expand_dims(frame, axis=0) rclasses, rscores, rbboxes = process_image(frame) visualization_camera.bboxes_draw_on_img(frame, rclasses, rscores, rbboxes) cv2.imshow('frame',frame) #cv2.imwrite('./pic_quechao/'+'quechao'+str(i)+'.jpg',image_np) i+=1 cv2.waitKey(np.uint(delay)) print(i,'Ongoing...') else: break cap.release() cv2.destroyAllWindows()