1_基于tf2调用现成的RetinaNet实现目标检测

Object Detection using RetinaNet

RetinaNet是用来做目标检测的,参考文献 Focal Loss for Dense Object Detection by Tsung-Yi Lin, Priya Goyal, Ross Girshick, Kaiming He and Piotr Dollár.

后面将使用 Keras based implementation 训练好的模型来进行测试,为练集为 COCO object detection dataset.

#正式版本与tensorflow2不兼容,安装一个分支
%pip install -q git+https://github.com/fizyr/keras-retinanet
# git clone https://github.com/fizyr/keras-retinanet
#pip install .

加载预训练好的模型

#所有预训练好的模型可以从这里下载:https://github.com/fizyr/keras-retinanet/releases
from urllib.request import urlretrieve
import os

PRETRAINED_MODEL_NAME = "resnet50_coco_best_v2.1.0.h5"
BACKBONE_NAME = 'resnet50'
PRETRAINED_BASE_URL = (
    "https://github.com/fizyr/keras-retinanet/"
    "releases/download/0.5.1/")

if not os.path.exists(PRETRAINED_MODEL_NAME):
    model_url = PRETRAINED_BASE_URL + PRETRAINED_MODEL_NAME
    print(f"Downloading {model_url}...")
    urlretrieve(model_url, PRETRAINED_MODEL_NAME)
    print("done.")
Downloading https://github.com/fizyr/keras-retinanet/releases/download/0.5.1/resnet50_coco_best_v2.1.0.h5...
done.
ls -lh *.h5
-rw-rw-r-- 1 tianliang tianliang 146M Feb  5 23:38 resnet50_coco_best_v2.1.0.h5
from keras_retinanet import models

# load retinanet model
model = models.load_model(PRETRAINED_MODEL_NAME, backbone_name=BACKBONE_NAME)
Using TensorFlow backend.


tracking <tf.Variable 'Variable:0' shape=(9, 4) dtype=float32, numpy=
array([[-22.627417, -11.313708,  22.627417,  11.313708],
       [-28.50876 , -14.25438 ,  28.50876 ,  14.25438 ],
       [-35.918785, -17.959393,  35.918785,  17.959393],
       [-16.      , -16.      ,  16.      ,  16.      ],
       [-20.158737, -20.158737,  20.158737,  20.158737],
       [-25.398417, -25.398417,  25.398417,  25.398417],
       [-11.313708, -22.627417,  11.313708,  22.627417],
       [-14.25438 , -28.50876 ,  14.25438 ,  28.50876 ],
       [-17.959393, -35.918785,  17.959393,  35.918785]], dtype=float32)> anchors
tracking <tf.Variable 'Variable:0' shape=(9, 4) dtype=float32, numpy=
array([[-45.254833, -22.627417,  45.254833,  22.627417],
       [-57.01752 , -28.50876 ,  57.01752 ,  28.50876 ],
       [-71.83757 , -35.918785,  71.83757 ,  35.918785],
       [-32.      , -32.      ,  32.      ,  32.      ],
       [-40.317474, -40.317474,  40.317474,  40.317474],
       [-50.796833, -50.796833,  50.796833,  50.796833],
       [-22.627417, -45.254833,  22.627417,  45.254833],
       [-28.50876 , -57.01752 ,  28.50876 ,  57.01752 ],
       [-35.918785, -71.83757 ,  35.918785,  71.83757 ]], dtype=float32)> anchors
tracking <tf.Variable 'Variable:0' shape=(9, 4) dtype=float32, numpy=
array([[ -90.50967 ,  -45.254833,   90.50967 ,   45.254833],
       [-114.03504 ,  -57.01752 ,  114.03504 ,   57.01752 ],
       [-143.67514 ,  -71.83757 ,  143.67514 ,   71.83757 ],
       [ -64.      ,  -64.      ,   64.      ,   64.      ],
       [ -80.63495 ,  -80.63495 ,   80.63495 ,   80.63495 ],
       [-101.593666, -101.593666,  101.593666,  101.593666],
       [ -45.254833,  -90.50967 ,   45.254833,   90.50967 ],
       [ -57.01752 , -114.03504 ,   57.01752 ,  114.03504 ],
       [ -71.83757 , -143.67514 ,   71.83757 ,  143.67514 ]],
      dtype=float32)> anchors
tracking <tf.Variable 'Variable:0' shape=(9, 4) dtype=float32, numpy=
array([[-181.01933,  -90.50967,  181.01933,   90.50967],
       [-228.07008, -114.03504,  228.07008,  114.03504],
       [-287.35028, -143.67514,  287.35028,  143.67514],
       [-128.     , -128.     ,  128.     ,  128.     ],
       [-161.2699 , -161.2699 ,  161.2699 ,  161.2699 ],
       [-203.18733, -203.18733,  203.18733,  203.18733],
       [ -90.50967, -181.01933,   90.50967,  181.01933],
       [-114.03504, -228.07008,  114.03504,  228.07008],
       [-143.67514, -287.35028,  143.67514,  287.35028]], dtype=float32)> anchors
tracking <tf.Variable 'Variable:0' shape=(9, 4) dtype=float32, numpy=
array([[-362.03867, -181.01933,  362.03867,  181.01933],
       [-456.14017, -228.07008,  456.14017,  228.07008],
       [-574.70056, -287.35028,  574.70056,  287.35028],
       [-256.     , -256.     ,  256.     ,  256.     ],
       [-322.5398 , -322.5398 ,  322.5398 ,  322.5398 ],
       [-406.37466, -406.37466,  406.37466,  406.37466],
       [-181.01933, -362.03867,  181.01933,  362.03867],
       [-228.07008, -456.14017,  228.07008,  456.14017],
       [-287.35028, -574.70056,  287.35028,  574.70056]], dtype=float32)> anchors


/home/tianliang/miniconda3/envs/dl/lib/python3.6/site-packages/keras/engine/saving.py:341: UserWarning: No training configuration found in save file: the model was *not* compiled. Compile it manually.
  warnings.warn('No training configuration found in save file: '
#print(model.summary())

对测试图片进行检测(分类和位置)

#labels to names of coco dataset
labels_to_names = {
    0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane',
    5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light',
    10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench',
    14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow',
    20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack',
    25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee',
    30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite',
    34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard',
    37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass',
    41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl',
    46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli',
    51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake',
    56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed',
    60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse',
    65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave',
    69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book',
    74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear',
    78: 'hair drier', 79: 'toothbrush'}

from keras_retinanet.utils.image import read_image_bgr, preprocess_image, resize_image
from keras_retinanet.utils.visualization import draw_box, draw_caption
from keras_retinanet.utils.colors import label_color
import matplotlib.pyplot as plt
import time
import cv2
import numpy as np


def detect_and_visualize(image_bgr):
    # copy to draw on
    draw = image_bgr.copy()
    draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)

    # preprocess image for network
    image_input = image_bgr.copy()
    image_input = preprocess_image(image_input)
    image_input, scale = resize_image(image_input)
    print(f"shape: {image_input.shape}, dtype: {image_input.dtype}, "
          f"range: {(image_input.min(), image.max())}")

    # process image
    start = time.time()
    boxes, scores, labels = model.predict_on_batch(
        np.expand_dims(image_input, axis=0))
    print(f"processing time: {time.time() - start:.1f}s")

    # correct for image scale
    boxes /= scale

    # visualize detections
    for box, score, label in zip(boxes[0], scores[0], labels[0]):
        # scores are sorted so we can break
        if score < 0.4:
            break

        color = label_color(label)

        b = box.astype(int)
        draw_box(draw, b, color=color)

        caption = "{} {:.3f}".format(labels_to_names[label], score)
        draw_caption(draw, b, caption)

    plt.figure(figsize=(8, 8))
    plt.axis('off')
    plt.imshow(draw)
    

# load image
image = read_image_bgr('test.png')
detect_and_visualize(image)
shape: (749, 1333, 3), dtype: float32, range: (-123.67999, 255)
processing time: 0.1s

在这里插入图片描述

使用笔记本电脑的摄像头

由于使用的是服务器,没有摄像头,这里只放代码

import cv2
from keras_retinanet.utils.image import read_image_bgr
import matplotlib.pyplot as plt

def camera_grab(camera_id=0, fallback_filename='test.png'):
    camera = cv2.VideoCapture(camera_id)
    try:
        # 连排10张,便摄像头自动调整,期望最后一张有好的效果        
        for i in range(10):
            snapshot_ok, image = camera.read()
        if not snapshot_ok:
            print("WARNING: could not access camera")
            if fallback_filename:
                image = read_image_bgr(fallback_filename)
    finally:
        camera.release()
    return image
image = camera_grab(camera_id=0)
plt.figure(figsize=(8, 8))
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.axis('off');
WARNING: could not access camera

在这里插入图片描述

detect_and_visualize(image)
shape: (749, 1333, 3), dtype: float32, range: (-123.67999, 255)
processing time: 0.1s

在这里插入图片描述

detect_and_visualize(image)
shape: (749, 1333, 3), dtype: float32, range: (-123.67999, 255)
processing time: 0.1s
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
本课程适合具有一定深度学习基础,希望发展为深度学习之计算机视觉方向的算法工程师和研发人员的同学们。基于深度学习的计算机视觉是目前人工智能最活跃的领域,应用非常广泛,如人脸识别和无人驾驶中的机器视觉等。该领域的发展日新月异,网络模型和算法层出不穷。如何快速入门并达到可以从事研发的高度对新手和中级水平的学生而言面临不少的挑战。精心准备的本课程希望帮助大家尽快掌握基于深度学习的计算机视觉的基本原理、核心算法和当前的领先技术,从而有望成为深度学习之计算机视觉方向的算法工程师和研发人员。本课程系统全面地讲述基于深度学习的计算机视觉技术的原理并进行项目实践。课程涵盖计算机视觉的七大任务,包括图像分类、目标检测、图像分割(语义分割、实例分割、全景分割)、人脸识别、图像描述、图像检索、图像生成(利用生成对抗网络)。本课程注重原理和实践相结合,逐篇深入解读经典和前沿论文70余篇,图文并茂破译算法难点, 使用思维导图梳理技术要点。项目实践使用Keras框架(后端为Tensorflow),学员可快速上手。通过本课程的学习,学员可把握基于深度学习的计算机视觉的技术发展脉络,掌握相关技术原理和算法,有助于开展该领域的研究与开发实战工作。另外,深度学习之计算机视觉方向的知识结构及学习建议请参见本人CSDN博客。本课程提供课程资料的课件PPT(pdf格式)和项目实践代码,方便学员学习和复习。本课程分为上下两部分,其中上部包含课程的前五章(课程介绍、深度学习基础、图像分类、目标检测、图像分割),下部包含课程的后四章(人脸识别、图像描述、图像检索、图像生成)。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值