使用TensorFlow Object Detection API进行红绿灯检测

最新推荐文章于 2024-04-23 11:28:33 发布

suiyuan2009

最新推荐文章于 2024-04-23 11:28:33 发布

阅读量810

点赞数

分类专栏： # TensorFlow Object Detection 文章标签： tensorflow 深度学习

本文链接：https://blog.csdn.net/suiyuan2009/article/details/105620114

版权

TensorFlow Object Detection 专栏收录该内容

6 篇文章 0 订阅

订阅专栏

项目目录结构

本文中未明确说明的情况下，所使用的路径均在./research目录下。

research
- object detection
- datasets
  - my_traffic_light (参照Pascal VOC目录结构)
    - Annotations
    - ImageSets
    - JPEGImages
    - SegmentationClass
    - SegmentationObject
    - tfrecord
      ***.tfrecord
      ***.pbtxt
- ssd_traffic_light_detection
  - ssd_traffic_light_detection_model
    - saved_model
      - variables
        saved_model.pb
        pipeline.config
        model.ckpt.meta / index / data-00000-of-00001
        frozen_inference_graph.pb
        checkpoint
  - train (主要存放用于启动训练的一些文件，和训练中间文件)
    - export
    - eval_0
      train_cmd.sh (存放一些会用到的训练命令等)
      model.ckpt-*****.meta
      model.ckpt-*****.index
      model.ckpt-*****.data-00000-of-00001
      graph.pbtxt
      events.out.tfevents.*****
      model_name_datasets.config
      pipeline.config
      checkpoint

数据集制作

图像采集

使用华为手机拍摄视频，存为*.mp4文件。

提取图像

extract_images_from_video
测试读取视频文件，查看文件的FPS/H/W和总帧数。

import cv2
import os
video_path = './JPEGImages/VID_20200419_122755.mp4'
output_dir = './JPEGImages/VID_20200419_122755'
if not os.path.exists(output_dir):
    os.mkdir(output_dir)
    
cap = cv2.VideoCapture(video_path)
success, frame = cap.read()
fps = cap.get(cv2.CAP_PROP_FPS)
n_frame = cap.get(cv2.CAP_PROP_FRAME_COUNT)
h_frame = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
w_frame = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
print('The video propertities is: fps={}, height={}, width={}, and has {} frames.'.format(fps, h_frame, w_frame, n_frame))

提取图片到视频文件夹下，提取的图片存放到以视频文件名为名的文件夹下。

def extract_images_from_video(video_path):
    video_name = os.path.basename(video_path).split('.')[0] # 得到视频名字，不含后缀
    output_dir = os.path.join(os.path.dirname(video_path), video_name)
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    cameraCapture = cv2.VideoCapture(video_path)
    success, frame = cameraCapture.read()
    idx = 0
    n_sels = 0
    while success:
        idx += 1
        if idx%45==0: # 每45张图片选取一张
            n_sels += 1
            frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
            frame_name = "{0}_{1:0>5d}.jpg".format(video_name, n_sels)
            frame_saved_path = os.path.join(output_dir, frame_name)
            cv2.imwrite(frame_saved_path, frame)
        success, frame = cameraCapture.read()
    cameraCapture.release()
    print("Finished extract images from {}".format(video_name))

import glob
video_files = "./JPEGImages/VID_20200419_*.mp4"
video_filepaths = glob.glob(video_files)
print(video_filepaths)
for path in video_filepaths:
    extract_images_from_video(path)

图像标注

训练

模型导出

进行推理

推理文件

导入包

import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile

from distutils.version import StrictVersion
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image

# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("../../")
from object_detection.utils import ops as utils_ops

if StrictVersion(tf.__version__) < StrictVersion('1.9.0'):
  raise ImportError('Please upgrade your TensorFlow installation to v1.9.* or later!')

# This is needed to display the images.
%matplotlib inline
from utils import label_map_util
from utils import visualization_utils as vis_util

# What model to download.
MODEL_NAME = 'my_traffic_light_detection'
# MODEL_FILE = MODEL_NAME + '.tar.gz'
MODEL_DIR = './model'
# DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_FROZEN_GRAPH = MODEL_DIR + '/frozen_inference_graph.pb'

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('./dataset', 'traffic_light_label_map.pbtxt')

导入计算图

detection_graph = tf.Graph()
with detection_graph.as_default():
  od_graph_def = tf.GraphDef()
  with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name='')
    ops = tf.get_default_graph().get_operations()
    all_tensor_names = {output.name for op in ops for output in op.outputs}
    print(all_tensor_names)

category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)
print(category_index)

def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

import glob
PATH_TO_TEST_IMAGES_DIR = './test_images'
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(0, 10) ]
# TEST_IMAGE_PATHS = glob.glob("./test_images/*.jpg")
print(TEST_IMAGE_PATHS)
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)

def run_inference_for_single_image(image, graph):
  with graph.as_default():
    with tf.Session() as sess:
      # Get handles to input and output tensors
      ops = tf.get_default_graph().get_operations()
      all_tensor_names = {output.name for op in ops for output in op.outputs}
      tensor_dict = {}
      for key in [
          'num_detections', 'detection_boxes', 'detection_scores',
          'detection_classes', 'detection_masks'
      ]:
        tensor_name = key + ':0'
        if tensor_name in all_tensor_names:
          tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
              tensor_name)
      if 'detection_masks' in tensor_dict:
        # The following processing is only for single image
        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        # Follow the convention by adding back the batch dimension
        tensor_dict['detection_masks'] = tf.expand_dims(
            detection_masks_reframed, 0)
      image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

      # Run inference
      output_dict = sess.run(tensor_dict,
                             feed_dict={image_tensor: np.expand_dims(image, 0)})

      # all outputs are float32 numpy arrays, so convert types as appropriate
      output_dict['num_detections'] = int(output_dict['num_detections'][0])
      output_dict['detection_classes'] = output_dict[
          'detection_classes'][0].astype(np.uint8)
      output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
      output_dict['detection_scores'] = output_dict['detection_scores'][0]
      if 'detection_masks' in output_dict:
        output_dict['detection_masks'] = output_dict['detection_masks'][0]
  return output_dict

import cv2
for image_path in TEST_IMAGE_PATHS:
  image = Image.open(image_path)
  # the array based representation of the image will be used later in order to prepare the
  # result image with boxes and labels on it.
  image_np = load_image_into_numpy_array(image)
  # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
  image_np_expanded = np.expand_dims(image_np, axis=0)
  # Actual detection.
  output_dict = run_inference_for_single_image(image_np, detection_graph)
  print(output_dict)
  # Visualization of the results of a detection.
  vis_util.visualize_boxes_and_labels_on_image_array(
      image_np,
      output_dict['detection_boxes'],
      output_dict['detection_classes'],
      output_dict['detection_scores'],
      category_index,
      instance_masks=output_dict.get('detection_masks'),
      use_normalized_coordinates=True,
      line_thickness=4)
  image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
#   cv2.imshow('image',image_np)
#   cv2.waitKey(10)
#   cv2.destroyAllWindows()
#   if cv2.waitKey(1000)&0xff == 113:
# cv2.destroyAllWindows()
#   plt.figure(figsize=IMAGE_SIZE)
#   plt.imshow(image_np)
# plt.show()

suiyuan2009

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
使用TensorFlow Object Detection API进行红绿灯检测

项目目录结构本文中未明确说明的情况下，所使用的路径均在./research目录下。researchobject detectiondatasetsmy_traffic_light (参照Pascal VOC目录结构)AnnotationsImageSetsJPEGImagesSegmentationClassSegmentationObjecttfrecord***...
复制链接

扫一扫