webcam
from data import COCODetection, get_label_map, MEANS, COLORS
from yolact import Yolact
from utils.augmentations import BaseTransform, FastBaseTransform, Resize
from layers.box_utils import jaccard, center_size, mask_iou
from utils import timer
from layers.output_utils import postprocess, undo_image_transformation
from data import cfg, set_cfg, set_dataset
import torch
import torch.backends.cudnn as cudnn
import argparse
import random
from collections import defaultdict
import cv2
def str2bool(v):
if v.lower() in ('yes', 'true', 't', 'y', '1'):
return True
elif v.lower() in ('no', 'false', 'f', 'n', '0'):
return False
else:
raise argparse.ArgumentTypeError('Boolean value expected.')
def parse_args(argv=None):
parser = argparse.ArgumentParser(
description='YOLACT COCO Evaluation')
parser.add_argument('--trained_model',
default='weights/ssd300_mAP_77.43_v2.pth', type=str,
help='Trained state_dict file path to open. If "interrupt", this will open the interrupt file.')
parser.add_argument('--top_k', default=15, type=int,
help='Further restrict the number of predictions to parse')
parser.add_argument('--cuda', default=True, type=str2bool,
help='Use cuda to evaulate model')
parser.add_argument('--fast_nms', default=True, type=str2bool,
help='Whether to use a faster, but not entirely correct version of NMS.')
parser.add_argument('--cross_class_nms', default=False, type=str2bool,
help='Whether compute NMS cross-class or per-class.')
parser.add_argument('--display_masks', default=True, type=str2bool,
help='Whether or not to display masks over bounding boxes')
parser.add_argument('--display_bboxes', default=True, type=str2bool,
help='Whether or not to display bboxes around masks')
parser.add_argument('--display_text', default=True, type=str2bool,
help='Whether or not to display text (class [score])')
parser.add_argument('--display_scores', default=True, type=str2bool,
help='Whether or not to display scores in addition to classes')
parser.add_argument('--display', dest='display', action='store_true',
help='Display qualitative results instead of quantitative ones.')
parser.add_argument('--shuffle', dest='shuffle', action='store_true',
help='Shuffles the images when displaying them. Doesn\'t have much of an effect when display is off though.')
parser.add_argument('--ap_data_file', default='results/ap_data.pkl', type=str,
help='In quantitative mode, the file to save detections before calculating mAP.')
parser.add_argument('--resume', dest='resume', action='store_true',
help='If display not set, this resumes mAP calculations from the ap_data_file.')
parser.add_argument('--max_images', default=-1, type=int,
help='The maximum number of images from the dataset to consider. Use -1 for all.')
parser.add_argument('--output_coco_json', dest='output_coco_json', action='store_true',
help='If display is not set, instead of processing IoU values, this just dumps detections into the coco json file.')
parser.add_argument('--bbox_det_file', default='results/bbox_detections.json', type=str,
help='The output file for coco bbox results if --coco_results is set.')
parser.add_argument('--mask_det_file', default='results/mask_detections.json', type=str,
help='The output file for coco mask results if --coco_results is set.')
parser.add_argument('--config', default=None,
help='The config object to use.')
parser.add_argument('--output_web_json', dest='output_web_json', action='store_true',
help='If display is not set, instead of processing IoU values, this dumps detections for usage with the detections viewer web thingy.')
parser.add_argument('--web_det_path', default='web/dets/', type=str,
help='If output_web_json is set, this is the path to dump detections into.')
parser.add_argument('--no_bar', dest='no_bar', action='store_true',
help='Do not output the status bar. This is useful for when piping to a file.')
parser.add_argument('--display_lincomb', default=False, type=str2bool,
help='If the config uses lincomb masks, output a visualization of how those masks are created.')
parser.add_argument('--benchmark', default=False, dest='benchmark', action='store_true',
help='Equivalent to running display mode but without displaying an image.')
parser.add_argument('--no_sort', default=False, dest='no_sort', action='store_true',
help='Do not sort images by hashed image ID.')
parser.add_argument('--seed', default=None, type=int,
help='The seed to pass into random.seed. Note: this is only really for the shuffle and does not (I think) affect cuda stuff.')
parser.add_argument('--mask_proto_debug', default=False, dest='mask_proto_debug', action='store_true',
help='Outputs stuff for scripts/compute_mask.py.')
parser.add_argument('--no_crop', default=False, dest='crop', action='store_false',
help='Do not crop output masks with the predicted bounding box.')
parser.add_argument('--image', default=None, type=str,
help='A path to an image to use for display.')
parser.add_argument('--score_threshold', default=0.15, type=float,
help='Detections with a score under this threshold will not be considered. This currently only works in display mode.')
parser.add_argument('--dataset', default=None, type=str,
help='If specified, override the dataset specified in the config with this one (example: coco2017_dataset).')
parser.add_argument('--detect', default=False, dest='detect', action='store_true',
help='Don\'t evauluate the mask branch at all and only do object detection. This only works for --display and --benchmark.')
parser.add_argument('--display_fps', default=True, dest='display_fps', action='store_true',
help='When displaying / saving video, draw the FPS on the frame')
parser.add_argument('--emulate_playback', default=False, dest='emulate_playback', action='store_true',
help='When saving a video, emulate the framerate that you\'d get running in real-time mode.')
parser.set_defaults(no_bar=False, display=False, resume=False, output_coco_json=False, output_web_json=False, shuffle=False,
benchmark=False, no_sort=False, no_hash=False, mask_proto_debug=False, crop=True, detect=False, display_fps=False,
emulate_playback=False)
global args
args = parser.parse_args(argv)
if args.output_web_json:
args.output_coco_json = True
if args.seed is not None:
random.seed(args.seed)
color_cache = defaultdict(lambda: {})
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''):
"""
Note: If undo_transform=False then im_h and im_w are allowed to be None.
"""
if undo_transform:
img_numpy = undo_image_transformation(img, w, h)
img_gpu = torch.Tensor(img_numpy).cuda()
else:
img_gpu = img / 255.0
h, w, _ = img.shape
with timer.env('Postprocess'):
save = cfg.rescore_bbox
cfg.rescore_bbox = True
t = postprocess(dets_out, w, h, visualize_lincomb = args.display_lincomb,
crop_masks = args.crop,
score_threshold = args.score_threshold)
cfg.rescore_bbox = save
with timer.env('Copy'):
idx = t[1].argsort(0, descending=True)[:args.top_k]
if cfg.eval_mask_branch:
# Masks are drawn on the GPU, so don't copy
masks = t[3][idx]
classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]
num_dets_to_consider = min(args.top_k, classes.shape[0])
for j in range(num_dets_to_consider):
if scores[j] < args.score_threshold:
num_dets_to_consider = j
break
# Quick and dirty lambda for selecting the color for a particular index
# Also keeps track of a per-gpu color cache for maximum speed
def get_color(j, on_gpu=None):
global color_cache
color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)
if on_gpu is not None and color_idx in color_cache[on_gpu]:
return color_cache[on_gpu][color_idx]
else:
color = COLORS[color_idx]
if not undo_transform:
# The image might come in as RGB or BRG, depending
color = (color[2], color[1], color[0])
if on_gpu is not None:
color = torch.Tensor(color).to(on_gpu).float() / 255.
color_cache[on_gpu][color_idx] = color
return color
# First, draw the masks on the GPU where we can do it really fast
# Beware: very fast but possibly unintelligible mask-drawing code ahead
# I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
# After this, mask is of size [num_dets, h, w, 1]
masks = masks[:num_dets_to_consider, :, :, None]
# Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0)
masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha
# This is 1 everywhere except for 1-mask_alpha where the mask is
inv_alph_masks = masks * (-mask_alpha) + 1
# I did the math for this on pen and paper. This whole block should be equivalent to:
# for j in range(num_dets_to_consider):
# img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
masks_color_summand = masks_color[0]
if num_dets_to_consider > 1:
inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0)
masks_color_cumul = masks_color[1:] * inv_alph_cumul
masks_color_summand += masks_color_cumul.sum(dim=0)
img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
# Then draw the stuff that needs to be done on the cpu
# Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
img_numpy = (img_gpu * 255).byte().cpu().numpy()
if num_dets_to_consider == 0:
return img_numpy
if args.display_text or args.display_bboxes:
for j in reversed(range(num_dets_to_consider)):
x1, y1, x2, y2 = boxes[j, :]
color = get_color(j)
score = scores[j]
if args.display_bboxes:
cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)
if args.display_text:
_class = cfg.dataset.class_names[classes[j]]
text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class
font_face = cv2.FONT_HERSHEY_DUPLEX
font_scale = 0.6
font_thickness = 1
text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0]
text_pt = (x1, y1 - 3)
text_color = [255, 255, 255]
cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1)
cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)
return img_numpy
def evaluate(net:Yolact, dataset, train_mode=False):
cap = cv2.VideoCapture(0)
import time
while True:
start = time.time()
frame = cap.read()[1]
frame = torch.from_numpy(frame).cuda().float()
batch = FastBaseTransform()(frame.unsqueeze(0))
preds = net(batch)
img_numpy = prep_display(preds, frame, None, None, undo_transform=False)
img_numpy = img_numpy[:, :, (2, 1, 0)]
cv2.imshow("test", img_numpy)
cv2.waitKey(1)
end = time.time()
print("\r FPS: ", 1/ (end - start), end='')
if __name__ == '__main__':
parse_args()
set_cfg("yolact_resnet50_config")
if torch.cuda.is_available():
print("cuda is avaliable!")
else:
print("cuda isn't available!")
if args.dataset is not None:
set_dataset(args.dataset)
with torch.no_grad():
if args.cuda:
cudnn.fastest = True
torch.set_default_tensor_type('torch.cuda.FloatTensor')
else:
torch.set_default_tensor_type('torch.FloatTensor')
dataset = None
print('Loading model...', end='')
net = Yolact()
net.load_weights(args.trained_model)
net.eval()
print(' Done.')
if args.cuda:
net = net.cuda()
net.detect.use_fast_nms = args.fast_nms
net.detect.use_cross_class_nms = args.cross_class_nms
cfg.mask_proto_debug = args.mask_proto_debug
evaluate(net, dataset)
ros
#!/usr/bin/env python
from data import COCODetection, get_label_map, MEANS, COLORS
from yolact import Yolact
from utils.augmentations import BaseTransform, FastBaseTransform, Resize
from layers.box_utils import jaccard, center_size, mask_iou
from utils import timer
from layers.output_utils import postprocess, undo_image_transformation
from data import cfg, set_cfg, set_dataset
import torch
import torch.backends.cudnn as cudnn
import argparse
import random
from collections import defaultdict
import cv2
import rospy
import ros_numpy
def str2bool(v):
if v.lower() in ('yes', 'true', 't', 'y', '1'):
return True
elif v.lower() in ('no', 'false', 'f', 'n', '0'):
return False
else:
raise argparse.ArgumentTypeError('Boolean value expected.')
def parse_args(argv=None):
parser = argparse.ArgumentParser(
description='YOLACT COCO Evaluation')
parser.add_argument('--trained_model',
default='/home/lzf/ros_ws/src/yolact_ros/scripts/yolact_resnet50_54_800000.pth', type=str,
help='Trained state_dict file path to open. If "interrupt", this will open the interrupt file.')
parser.add_argument('--top_k', default=15, type=int,
help='Further restrict the number of predictions to parse')
parser.add_argument('--cuda', default=True, type=str2bool,
help='Use cuda to evaulate model')
parser.add_argument('--fast_nms', default=True, type=str2bool,
help='Whether to use a faster, but not entirely correct version of NMS.')
parser.add_argument('--cross_class_nms', default=False, type=str2bool,
help='Whether compute NMS cross-class or per-class.')
parser.add_argument('--display_masks', default=True, type=str2bool,
help='Whether or not to display masks over bounding boxes')
parser.add_argument('--display_bboxes', default=True, type=str2bool,
help='Whether or not to display bboxes around masks')
parser.add_argument('--display_text', default=True, type=str2bool,
help='Whether or not to display text (class [score])')
parser.add_argument('--display_scores', default=True, type=str2bool,
help='Whether or not to display scores in addition to classes')
parser.add_argument('--display', dest='display', action='store_true',
help='Display qualitative results instead of quantitative ones.')
parser.add_argument('--shuffle', dest='shuffle', action='store_true',
help='Shuffles the images when displaying them. Doesn\'t have much of an effect when display is off though.')
parser.add_argument('--ap_data_file', default='results/ap_data.pkl', type=str,
help='In quantitative mode, the file to save detections before calculating mAP.')
parser.add_argument('--resume', dest='resume', action='store_true',
help='If display not set, this resumes mAP calculations from the ap_data_file.')
parser.add_argument('--max_images', default=-1, type=int,
help='The maximum number of images from the dataset to consider. Use -1 for all.')
parser.add_argument('--output_coco_json', dest='output_coco_json', action='store_true',
help='If display is not set, instead of processing IoU values, this just dumps detections into the coco json file.')
parser.add_argument('--bbox_det_file', default='results/bbox_detections.json', type=str,
help='The output file for coco bbox results if --coco_results is set.')
parser.add_argument('--mask_det_file', default='results/mask_detections.json', type=str,
help='The output file for coco mask results if --coco_results is set.')
parser.add_argument('--config', default=None,
help='The config object to use.')
parser.add_argument('--output_web_json', dest='output_web_json', action='store_true',
help='If display is not set, instead of processing IoU values, this dumps detections for usage with the detections viewer web thingy.')
parser.add_argument('--web_det_path', default='web/dets/', type=str,
help='If output_web_json is set, this is the path to dump detections into.')
parser.add_argument('--no_bar', dest='no_bar', action='store_true',
help='Do not output the status bar. This is useful for when piping to a file.')
parser.add_argument('--display_lincomb', default=False, type=str2bool,
help='If the config uses lincomb masks, output a visualization of how those masks are created.')
parser.add_argument('--benchmark', default=False, dest='benchmark', action='store_true',
help='Equivalent to running display mode but without displaying an image.')
parser.add_argument('--no_sort', default=False, dest='no_sort', action='store_true',
help='Do not sort images by hashed image ID.')
parser.add_argument('--seed', default=None, type=int,
help='The seed to pass into random.seed. Note: this is only really for the shuffle and does not (I think) affect cuda stuff.')
parser.add_argument('--mask_proto_debug', default=False, dest='mask_proto_debug', action='store_true',
help='Outputs stuff for scripts/compute_mask.py.')
parser.add_argument('--no_crop', default=False, dest='crop', action='store_false',
help='Do not crop output masks with the predicted bounding box.')
parser.add_argument('--image', default=None, type=str,
help='A path to an image to use for display.')
parser.add_argument('--score_threshold', default=0.15, type=float,
help='Detections with a score under this threshold will not be considered. This currently only works in display mode.')
parser.add_argument('--dataset', default=None, type=str,
help='If specified, override the dataset specified in the config with this one (example: coco2017_dataset).')
parser.add_argument('--detect', default=False, dest='detect', action='store_true',
help='Don\'t evauluate the mask branch at all and only do object detection. This only works for --display and --benchmark.')
parser.add_argument('--display_fps', default=True, dest='display_fps', action='store_true',
help='When displaying / saving video, draw the FPS on the frame')
parser.add_argument('--emulate_playback', default=False, dest='emulate_playback', action='store_true',
help='When saving a video, emulate the framerate that you\'d get running in real-time mode.')
parser.set_defaults(no_bar=False, display=False, resume=False, output_coco_json=False, output_web_json=False, shuffle=False,
benchmark=False, no_sort=False, no_hash=False, mask_proto_debug=False, crop=True, detect=False, display_fps=False,
emulate_playback=False)
global args
args = parser.parse_args(argv)
if args.output_web_json:
args.output_coco_json = True
if args.seed is not None:
random.seed(args.seed)
color_cache = defaultdict(lambda: {})
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''):
"""
Note: If undo_transform=False then im_h and im_w are allowed to be None.
"""
if undo_transform:
img_numpy = undo_image_transformation(img, w, h)
img_gpu = torch.Tensor(img_numpy).cuda()
else:
img_gpu = img / 255.0
h, w, _ = img.shape
with timer.env('Postprocess'):
save = cfg.rescore_bbox
cfg.rescore_bbox = True
t = postprocess(dets_out, w, h, visualize_lincomb = args.display_lincomb,
crop_masks = args.crop,
score_threshold = args.score_threshold)
cfg.rescore_bbox = save
with timer.env('Copy'):
idx = t[1].argsort(0, descending=True)[:args.top_k]
if cfg.eval_mask_branch:
# Masks are drawn on the GPU, so don't copy
masks = t[3][idx]
classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]
num_dets_to_consider = min(args.top_k, classes.shape[0])
for j in range(num_dets_to_consider):
if scores[j] < args.score_threshold:
num_dets_to_consider = j
break
# Quick and dirty lambda for selecting the color for a particular index
# Also keeps track of a per-gpu color cache for maximum speed
def get_color(j, on_gpu=None):
global color_cache
color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)
if on_gpu is not None and color_idx in color_cache[on_gpu]:
return color_cache[on_gpu][color_idx]
else:
color = COLORS[color_idx]
if not undo_transform:
# The image might come in as RGB or BRG, depending
color = (color[2], color[1], color[0])
if on_gpu is not None:
color = torch.Tensor(color).to(on_gpu).float() / 255.
color_cache[on_gpu][color_idx] = color
return color
# First, draw the masks on the GPU where we can do it really fast
# Beware: very fast but possibly unintelligible mask-drawing code ahead
# I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
# After this, mask is of size [num_dets, h, w, 1]
masks = masks[:num_dets_to_consider, :, :, None]
# Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0)
masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha
# This is 1 everywhere except for 1-mask_alpha where the mask is
inv_alph_masks = masks * (-mask_alpha) + 1
# I did the math for this on pen and paper. This whole block should be equivalent to:
# for j in range(num_dets_to_consider):
# img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
masks_color_summand = masks_color[0]
if num_dets_to_consider > 1:
inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0)
masks_color_cumul = masks_color[1:] * inv_alph_cumul
masks_color_summand += masks_color_cumul.sum(dim=0)
img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
# Then draw the stuff that needs to be done on the cpu
# Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
img_numpy = (img_gpu * 255).byte().cpu().numpy()
if num_dets_to_consider == 0:
return img_numpy
if args.display_text or args.display_bboxes:
for j in reversed(range(num_dets_to_consider)):
x1, y1, x2, y2 = boxes[j, :]
color = get_color(j)
score = scores[j]
if args.display_bboxes:
cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)
if args.display_text:
_class = cfg.dataset.class_names[classes[j]]
text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class
font_face = cv2.FONT_HERSHEY_DUPLEX
font_scale = 0.6
font_thickness = 1
text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0]
text_pt = (x1, y1 - 3)
text_color = [255, 255, 255]
cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1)
cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)
return img_numpy
def evaluate(net:Yolact, dataset, train_mode=False):
import time
from sensor_msgs.msg import Image
while True:
msg = rospy.wait_for_message("/camera/rgb/image_rect_color", Image)
frame = ros_numpy.numpify(msg)
start = time.time()
frame = torch.from_numpy(frame).cuda().float()
batch = FastBaseTransform()(frame.unsqueeze(0))
preds = net(batch)
img_numpy = prep_display(preds, frame, None, None, undo_transform=False)
img_numpy = img_numpy[:, :, (2, 1, 0)]
cv2.imshow("test", img_numpy)
cv2.waitKey(1)
end = time.time()
print("\r FPS: ", 1/ (end - start), end='')
if __name__ == '__main__':
parse_args()
rospy.init_node("yolact_ros")
set_cfg("yolact_resnet50_config")
if torch.cuda.is_available():
print("cuda is avaliable!")
else:
print("cuda isn't available!")
if args.dataset is not None:
set_dataset(args.dataset)
with torch.no_grad():
if args.cuda:
cudnn.fastest = True
torch.set_default_tensor_type('torch.cuda.FloatTensor')
else:
torch.set_default_tensor_type('torch.FloatTensor')
dataset = None
print('Loading model...', end='')
net = Yolact()
net.load_weights(args.trained_model)
net.eval()
print(' Done.')
if args.cuda:
net = net.cuda()
net.detect.use_fast_nms = args.fast_nms
net.detect.use_cross_class_nms = args.cross_class_nms
cfg.mask_proto_debug = args.mask_proto_debug
evaluate(net, dataset)