【深度学习】Tensorflow object detection API 搭建物体识别模型(代码解析)

(ananconda+tensorflow-gpu+py36+cuda+cudnn+tensorflow object detection api)

1、准备图片数据

在百度上搜的马云的照片,在项目文件夹(D:\DeepLearning\my_object_detection)下新建文件夹images\test 和 images\train两个文件夹,存放在网上选取的马云照片。

即:D:\DeepLearning\my_object_detection\images\test    test_001.jpg ~ test_010.jpg        

       D:\DeepLearning\my_object_detection\images\train    train_001.jpg ~ train_030.jpg

train中下载30张照片,test中下载10张照片。

注意!!!这里项目路径下的images是自己新建的,最好名字一样images,不然后面有个程序中有images,我一开始就设为了image,少了个s,后面导出record文件的时候,就一直报错,找不到文件。

2、使用LabelImg人工标注

LabelImg安装:打开Anaonda Prompt,pip install labelImg

运行:打开Anaonda Prompt ,直接输入 labelImg,回车,打开标注软件

按下 “ W ”进行画框标注,保存,应为xml格式的文件

标注完四十张照片,每一张图片都有一个xml文件与之对应。 

3、xml to csv

xml_to_csv.py(需要改动的地方已经在程序中标明,直接运行即可)

import os
import glob
import pandas as pd
#pandas 用于数据分析和数据挖掘等,提供了较多的方法,使得数据处理非常简单,同时在数据处理速度上也做了很多的优化
import xml.etree.ElementTree as ET

"""
解析、通过读取文件来导入这些数据
import xml.etree.ElementTree as ET
tree = ET.parse('xxx.xml')
root = tree.getroot()
"""

path = 'D:\\code\\Detection\\images\\test'   //修改的地方 改成需要转换的图片文件夹
os.chdir(path)    
   
def xml_to_csv(path):
    xml_list = []  #创建整个大的文件数组
    # glob.glob 用它查找符合特定规则的文件路径名 ,这里就是查找以 /*.xml 为结尾的文件名
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot() #导入xml文件中的数据
#Element.findall() 只找到带有标签的元素,该标签是当前元素的直接子元素
#Element.find() 找到第一个带有特定标签的子元素  即为test_001.jpg
        for member in root.findall('object'):  
            value = (root.find('filename').text,
                     int(root.find('size')[0].text),
                     int(root.find('size')[1].text),
                     member[0].text,
                     int(member[4][0].text),    #目标区域的位置
                     int(member[4][1].text),
                     int(member[4][2].text),
                     int(member[4][3].text)
                     )
            xml_list.append(value)
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
# pd.DataFrame 从字典对象导入数据,Key是列名 ,Value是数据
    return xml_df
"""
首先查询当前的工作路径
import os 
os.getcwd() #获取当前的工作路径
dt.to_csv('xxx.csv') #相对位置,保存在getwcd()获得的路径下
"""
def main():
    image_path = path
    xml_df = xml_to_csv(image_path)
    xml_df.to_csv('MaYun_test.csv', index=None)      //设置保存的csv文件名
    print('Successfully converted xml to csv.')

main()

运行两次后(test 和 train),生成.csv文件表格,为MaYun_test.csv和MaYun_train.csv,

在项目目录下新建文件夹data :D:\DeepLearning\my_object_detection\data ,将两个csv文件放入

4、csv to tfrecord

将generate_tfrecord.py文件保存到项目下,D:\DeepLearning\my_object_detection\generate_tfrecord.py

注意!!!:在运行程序之前,要把images文件夹下的test和train中的所有文件放到images下面,不然最报错,找不到文件。至于为什么,在后面看代码的过程中,我会慢慢理解的。最起码,先跑起来,才有学下去的欲望啊

import os
import io
import pandas as pd
import tensorflow as tf
from PIL import Image
from object_detection.utils import dataset_util
from collections import namedtuple,OrderedDict

os.chdir('D:/DeepLearning/object_detection')

def class_text_to_int(row_label):
    if row_label == 'MaYun':
        return 1;
    else:
        None

def split(df,group): #读入分割的参数 df:example(即读入的csv文件) , group:‘filename’
    data = namedtuple('data',['filename','object']) #namedtuple可以看作是字典的元组
    gb = df.groupby(group) #通过filename进行分组聚合,同一个filename名进行聚合
    return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]

#输入参数,group分组聚合之后每一个字典的元素,path:图片的读入路径
def create_tf_example(group, path):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()    #图像的读操作。
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')  #utf8格式进行编码
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)       #对标注位置进行归一化操作
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))  #获取类别名
        classes.append(class_text_to_int(row['class']))   #根据类别名得到对应的索引

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example


def main(csv_input, tfrecord_output):

    #创建一个TFRecordWriter对象,这个对象就负责写记录到指定的文件中去.
    writer = tf.python_io.TFRecordWriter(tfrecord_output)
    path = os.path.join(os.getcwd(), 'images')  #os.getcwd()获取当前工作路径 path 获取当前工作路径下的images文件夹

    examples = pd.read_csv(csv_input)  #pd.read_csv读取文件(传入csv文件路径)
    grouped = split(examples, 'filename')#按照filename进行分组聚合,以字典的形式进行返回
    for group in grouped:
        tf_example = create_tf_example(group, path)#创建返回创建的record文件
        writer.write(tf_example.SerializeToString())

    writer.close()
    #output_path = os.path.join(os.getcwd(), FLAGS.output_path)
    print('Successfully created the TFRecords: {}'.format(tfrecord_output))


if __name__ == '__main__':
    print(os.getcwd())#打印指定的当前路劲
    print( os.path.join(os.getcwd(), 'data'))#添加当前图片所在的路径到该路径下并打印。
    #训练集
    main('data/MaYun_train.csv', 'data/MY_train.record')
    #测试集
    main('data/MaYun_test.csv', 'data/MY_test.record')
    #tf.app.run()

在Anaconda Prompt中,定位到项目路径下:

python generate_tfrecord.py --csv_input=data/MaYun_train.csv  --output_path=train.record
python generate_tfrecord.py --csv_input=data/MaYun_test.csv  --output_path=test.record

data是项目目录下文件夹,之前两个csv格式的文件已经保存到该文件夹下

运行,在data文件夹下生成两个record文件。

5、配置文件与模型

设置配置文件,Object Detection github,选则欲使用的文件,以 ssd_mobilenet_v1_coco.config 为例,在主项目目录下新建文件夹training,解压 ssd_mobilenet_v1_coco_2017_11_17.tar.gz,将ssd_mobilenet_v1_coco.config 放在training 文件夹下,使用编辑器打开,修改以下几个部分,

ssd_mobilenet_v1_coco.config

# SSD with Mobilenet v1 configuration for MSCOCO Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {                                                   #模型
  ssd {                                                    
    num_classes: 90                                       #类别
    box_coder {                                           #候选边框
      faster_rcnn_box_coder {                             
        y_scale: 10.0
        x_scale: 10.0
        height_scale: 5.0
        width_scale: 5.0
      }
    }
    matcher {
      argmax_matcher {
        matched_threshold: 0.5
        unmatched_threshold: 0.5
        ignore_thresholds: false
        negatives_lower_than_unmatched: true
        force_match_for_each_row: true
      }
    }
    similarity_calculator {
      iou_similarity {
      }
    }
    anchor_generator {
      ssd_anchor_generator {
        num_layers: 6
        min_scale: 0.2
        max_scale: 0.95
        aspect_ratios: 1.0
        aspect_ratios: 2.0
        aspect_ratios: 0.5
        aspect_ratios: 3.0
        aspect_ratios: 0.3333
      }
    }
    image_resizer {
      fixed_shape_resizer {
        height: 300
        width: 300
      }
    }
    box_predictor {
      convolutional_box_predictor {
        min_depth: 0
        max_depth: 0
        num_layers_before_predictor: 0
        use_dropout: false
        dropout_keep_probability: 0.8
        kernel_size: 1
        box_code_size: 4
        apply_sigmoid_to_scores: false
        conv_hyperparams {
          activation: RELU_6,
          regularizer {
            l2_regularizer {
              weight: 0.00004
            }
          }
          initializer {
            truncated_normal_initializer {
              stddev: 0.03
              mean: 0.0
            }
          }
          batch_norm {
            train: true,
            scale: true,
            center: true,
            decay: 0.9997,
            epsilon: 0.001,
          }
        }
      }
    }
    feature_extractor {
      type: 'ssd_mobilenet_v1'
      min_depth: 16
      depth_multiplier: 1.0
      conv_hyperparams {
        activation: RELU_6,
        regularizer {
          l2_regularizer {
            weight: 0.00004
          }
        }
        initializer {
          truncated_normal_initializer {
            stddev: 0.03
            mean: 0.0
          }
        }
        batch_norm {
          train: true,
          scale: true,
          center: true,
          decay: 0.9997,
          epsilon: 0.001,
        }
      }
    }
    loss {
      classification_loss {
        weighted_sigmoid {
        }
      }
      localization_loss {
        weighted_smooth_l1 {
        }
      }
      hard_example_miner {
        num_hard_examples: 3000
        iou_threshold: 0.99
        loss_type: CLASSIFICATION
        max_negatives_per_positive: 3
        min_negatives_per_image: 0
      }
      classification_weight: 1.0
      localization_weight: 1.0
    }
    normalize_loss_by_num_matches: true
    post_processing {
      batch_non_max_suppression {
        score_threshold: 1e-8
        iou_threshold: 0.6
        max_detections_per_class: 100
        max_total_detections: 100
      }
      score_converter: SIGMOID
    }
  }
}

train_config: {
  batch_size: 2                     /batch_size 值设置的越大,越容易收敛,但是比较吃显卡
  optimizer {
    rms_prop_optimizer: {
      learning_rate: {
        exponential_decay_learning_rate {
          initial_learning_rate: 0.004
          decay_steps: 800720
          decay_factor: 0.95
        }
      }
      momentum_optimizer_value: 0.9
      decay: 0.9
      epsilon: 1.0
    }
  }

  # Note: The below line limits the training process to 200K steps, which we
  # empirically found to be sufficient enough to train the pets dataset. This
  # effectively bypasses the learning rate schedule (the learning rate will
  # never decay). Remove the below line to train indefinitely.
  num_steps: 200000
  data_augmentation_options {
    random_horizontal_flip {
    }
  }
  data_augmentation_options {
    ssd_random_crop {
    }
  }
}

train_input_reader: {
  tf_record_input_reader {
    input_path: "data/MY_train.record"  /之前在data中保存过的MY_train.record文件
  }
  label_map_path: "data/MY.pbtxt"    //这个地方是新建的pbtxt文件,一会说
}

eval_config: {
  num_examples: 8000
  # Note: The below line limits the evaluation process to 10 evaluations.
  # Remove the below line to evaluate indefinitely.
  max_evals: 10
}

eval_input_reader: {
  tf_record_input_reader {
    input_path: "data/MY_test.record"      /之前在data中保存过的MY_test.record文件
  }
  label_map_path: "data/MY.pbtxt"
  shuffle: false
  num_readers: 1
}

在data文件加下新建标签文件,MY.pbtxt, 

MY.pbtxt

item{
    id: 1
    name:'MaYun'
}

6、训练模型

train.py

import functools
import json
import os
import tensorflow as tf

from object_detection.builders import dataset_builder#数据
from object_detection.builders import graph_rewriter_builder
from object_detection.builders import model_builder#模型创建
from object_detection.legacy import trainer#训练
from object_detection.utils import config_util

tf.logging.set_verbosity(tf.logging.INFO)

flags = tf.app.flags
flags.DEFINE_string('master', '', 'Name of the TensorFlow master to use.')
flags.DEFINE_integer('task', 0, 'task id')
flags.DEFINE_integer('num_clones', 1, 'Number of clones to deploy per worker.')
flags.DEFINE_boolean('clone_on_cpu', False,
                     'Force clones to be deployed on CPU.  Note that even if '
                     'set to False (allowing ops to run on gpu), some ops may '
                     'still be run on the CPU if they have no GPU kernel.')
flags.DEFINE_integer('worker_replicas', 1, 'Number of worker+trainer '
                     'replicas.')
flags.DEFINE_integer('ps_tasks', 0,
                     'Number of parameter server tasks. If None, does not use '
                     'a parameter server.')
flags.DEFINE_string('train_dir', 'D:/DeepLearning/object_detection/training',         /修改为自己的training目录
                    'Directory to save the checkpoints and training summaries.')

flags.DEFINE_string('pipeline_config_path', 'D:/DeepLearning/object_detection/training/ssd_mobilenet_v1_coco.config',      
                    'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
                    'file. If provided, other configs are ignored')

flags.DEFINE_string('train_config_path', '',
                    'Path to a train_pb2.TrainConfig config file.')
flags.DEFINE_string('input_config_path', '',
                    'Path to an input_reader_pb2.InputReader config file.')
flags.DEFINE_string('model_config_path', '',
                    'Path to a model_pb2.DetectionModel config file.')

FLAGS = flags.FLAGS


@tf.contrib.framework.deprecated(None, 'Use object_detection/model_main.py.')
def main(_):
  assert FLAGS.train_dir, '`train_dir` is missing.'
  if FLAGS.task == 0:
      print('这里1')
      tf.gfile.MakeDirs(FLAGS.train_dir)#tf.gfile为文件操作模块
  if FLAGS.pipeline_config_path:
    print('这里2')
    configs = config_util.get_configs_from_pipeline_file(
        FLAGS.pipeline_config_path)
    if FLAGS.task == 0:
        print('这里3')
        tf.gfile.Copy(FLAGS.pipeline_config_path,
                    os.path.join(FLAGS.train_dir, 'pipeline.config'),
                    overwrite=True)
  else:
    configs = config_util.get_configs_from_multiple_files(
        model_config_path=FLAGS.model_config_path,
        train_config_path=FLAGS.train_config_path,
        train_input_config_path=FLAGS.input_config_path)
    if FLAGS.task == 0:
      for name, config in [('model.config', FLAGS.model_config_path),
                           ('train.config', FLAGS.train_config_path),
                           ('input.config', FLAGS.input_config_path)]:
        tf.gfile.Copy(config, os.path.join(FLAGS.train_dir, name),
                      overwrite=True)


  #configs <class 'dict'>
  #model_config <class 'object_detection.protos.model_pb2.DetectionModel'>
  #train_config <class 'object_detection.protos.train_pb2.TrainConfig'>
  #input_config <class 'object_detection.protos.input_reader_pb2.InputReader'>
  model_config = configs['model']
  train_config = configs['train_config']
  input_config = configs['train_input_config']
#functools.partial(func, *args, **keywords)返回一个新的partial对象,该对象被调用时,类似使用位置参数args和关键字参数keywords调用func。
#model_fn为model_config,is_training参数值固定后的model_builder.build函数
  model_fn = functools.partial(
      model_builder.build,
      model_config=model_config,#目标检测模型的构建信息
      is_training=True)

  def get_next(config):
    return dataset_builder.make_initializable_iterator(
        dataset_builder.build(config)).get_next()

  create_input_dict_fn = functools.partial(get_next, input_config)

  env = json.loads(os.environ.get('TF_CONFIG', '{}'))
  cluster_data = env.get('cluster', None)
  cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None
  task_data = env.get('task', None) or {'type': 'master', 'index': 0}
  task_info = type('TaskSpec', (object,), task_data)

  # Parameters for a single worker.
  ps_tasks = 0
  worker_replicas = 1
  worker_job_name = 'lonely_worker'
  task = 0
  is_chief = True
  master = ''

  if cluster_data and 'worker' in cluster_data:
    # Number of total worker replicas include "worker"s and the "master".
    worker_replicas = len(cluster_data['worker']) + 1
  if cluster_data and 'ps' in cluster_data:
    ps_tasks = len(cluster_data['ps'])

  if worker_replicas > 1 and ps_tasks < 1:
    raise ValueError('At least 1 ps task is needed for distributed training.')

  if worker_replicas >= 1 and ps_tasks > 0:
    # Set up distributed training.
    server = tf.train.Server(tf.train.ClusterSpec(cluster), protocol='grpc',
                             job_name=task_info.type,
                             task_index=task_info.index)
    if task_info.type == 'ps':
      server.join()
      return

    worker_job_name = '%s/task:%d' % (task_info.type, task_info.index)
    task = task_info.index
    is_chief = (task_info.type == 'master')
    master = server.target

  graph_rewriter_fn = None
  if 'graph_rewriter_config' in configs:
    graph_rewriter_fn = graph_rewriter_builder.build(
        configs['graph_rewriter_config'], is_training=True)
  #训练开始
  trainer.train(
      create_input_dict_fn,#数据预处理
      model_fn,#模型创建
      train_config,#训练配置信息
      master,
      task,
      FLAGS.num_clones,#每个设备的克隆数(应该与分布式部署有关系)
      worker_replicas,
      FLAGS.clone_on_cpu,#默认设置的是false
      ps_tasks,
      worker_job_name,
      is_chief,
      FLAGS.train_dir,
      graph_hook_fn=graph_rewriter_fn)


if __name__ == '__main__':
    tf.app.run()

修改好路径,运行程序。

在训练过程中,还可以使用tensorboard来可视化训练过程。

Anaconda Prompt定位到项目文件下,即D:\DeepLearning\object_detection

运行 tensorboard --logdir='training'       

注意:一定要定位带training所在的目录中,我在这个地方停顿了很久,

训练可是随时结束,开始,在training目录下生成了模型文件。

 7、meta_to_pb

export_inference_graph.py

import tensorflow as tf
from google.protobuf import text_format
from object_detection import exporter
from object_detection.protos import pipeline_pb2

slim = tf.contrib.slim
flags = tf.app.flags

flags.DEFINE_string('input_type', 'image_tensor', 'Type of input node. Can be '
                    'one of [`image_tensor`, `encoded_image_string_tensor`, '
                    '`tf_example`]')
flags.DEFINE_string('input_shape', None,
                    'If input_type is `image_tensor`, this can explicitly set '
                    'the shape of this input tensor to a fixed size. The '
                    'dimensions are to be provided as a comma-separated list '
                    'of integers. A value of -1 can be used for unknown '
                    'dimensions. If not specified, for an `image_tensor, the '
                    'default shape will be partially specified as '
                    '`[None, None, None, 3]`.')
flags.DEFINE_string('pipeline_config_path', None,
                    'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
                    'file.')
flags.DEFINE_string('trained_checkpoint_prefix', None,
                    'Path to trained checkpoint, typically of the form '
                    'path/to/model.ckpt')
flags.DEFINE_string('output_directory', None, 'Path to write outputs.')
flags.DEFINE_string('config_override', '',
                    'pipeline_pb2.TrainEvalPipelineConfig '
                    'text proto to override pipeline_config_path.')
flags.DEFINE_boolean('write_inference_graph', False,
                     'If true, writes inference graph to disk.')
tf.app.flags.mark_flag_as_required('pipeline_config_path')
tf.app.flags.mark_flag_as_required('trained_checkpoint_prefix')
tf.app.flags.mark_flag_as_required('output_directory')
FLAGS = flags.FLAGS

def main(_):
  pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
  with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
    text_format.Merge(f.read(), pipeline_config)
  text_format.Merge(FLAGS.config_override, pipeline_config)
  if FLAGS.input_shape:
    input_shape = [
        int(dim) if dim != '-1' else None
        for dim in FLAGS.input_shape.split(',')
    ]
  else:
    input_shape = None
  exporter.export_inference_graph(
      FLAGS.input_type, pipeline_config, FLAGS.trained_checkpoint_prefix,
      FLAGS.output_directory, input_shape=input_shape,
      write_inference_graph=FLAGS.write_inference_graph)

if __name__ == '__main__':
  tf.app.run()

将该py文件保存到项目目录中,即D:\DeepLearning\object_detection 下

Anaconda Prompt定位到D:\DeepLearning\object_detection 下,运行

python export_inference_graph.py \ --input_type image_tensor \ --pipeline_config_path training/ssd_mobilenet_v1_coco.config \  --trained_checkpoint_prefix training/model.ckpt-xxxx \  --output_directory MaYun_detection(项目目录中,新建文件夹)
运行。

训练过程,到此就全部结束了。

8、测试模型

修改的地方已经在程序中标记,标记完正常运行即可

object_detection_tutorial.py

import numpy as np
import os
import sys
import tensorflow as tf
from distutils.version import StrictVersion
import matplotlib.pyplot as plt
from matplotlib.pyplot import savefig
from PIL import Image
import glob
import time
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
from object_detection.utils import ops as utils_ops

if StrictVersion(tf.__version__) < StrictVersion('1.9.0'):
  raise ImportError('Please upgrade your TensorFlow installation to v1.9.* or later!')

from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

# pb文件路径

PATH_TO_FROZEN_GRAPH = 'D:/DeepLearning/object_detection/MaYun_detection/' + 'frozen_inference_graph.pb'   //这一块就是生成的自己的pb文件

# 标签文件路径
PATH_TO_LABELS = os.path.join('D:/DeepLearning/object_detection/data/', 'MY.pbtxt')
改为自己data的路径和pbtxt文件
detection_graph = tf.Graph()
with detection_graph.as_default():
  od_graph_def = tf.GraphDef()
  with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name='')
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)

def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)

def run_inference_for_single_image(image, graph):
  with graph.as_default():
    with tf.Session() as sess:
      # Get handles to input and output tensors
      ops = tf.get_default_graph().get_operations()
      all_tensor_names = {output.name for op in ops for output in op.outputs}
      tensor_dict = {}
      for key in [
          'num_detections', 'detection_boxes', 'detection_scores',
          'detection_classes', 'detection_masks'
      ]:
        tensor_name = key + ':0'
        if tensor_name in all_tensor_names:
          tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
              tensor_name)
      if 'detection_masks' in tensor_dict:
        # The following processing is only for single image
        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        # Follow the convention by adding back the batch dimension
        tensor_dict['detection_masks'] = tf.expand_dims(
            detection_masks_reframed, 0)
      image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

      # Run inference
      output_dict = sess.run(tensor_dict,
                             feed_dict={image_tensor: np.expand_dims(image, 0)})

      # all outputs are float32 numpy arrays, so convert types as appropriate
      output_dict['num_detections'] = int(output_dict['num_detections'][0])
      output_dict['detection_classes'] = output_dict[
          'detection_classes'][0].astype(np.uint8)
      output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
      output_dict['detection_scores'] = output_dict['detection_scores'][0]
      if 'detection_masks' in output_dict:
        output_dict['detection_masks'] = output_dict['detection_masks'][0]
  return output_dict

for image_path in glob.glob('D:/DeepLearning/object_detection/image_in/*.jpg'):   待验证的图片文件
  image = Image.open(image_path)
  # the array based representation of the image will be used later in order to prepare the
  # result image with boxes and labels on it.
  image_np = load_image_into_numpy_array(image)
  # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
  image_np_expanded = np.expand_dims(image_np, axis=0)
  # Actual detection.
  start=time.time()
  output_dict = run_inference_for_single_image(image_np, detection_graph)
  end=time.time()
  #print(start-end+'秒')
  # Visualization of the results of a detection.
  vis_util.visualize_boxes_and_labels_on_image_array(
      image_np,
      output_dict['detection_boxes'],
      output_dict['detection_classes'],
      output_dict['detection_scores'],
      category_index,
      instance_masks=output_dict.get('detection_masks'),
      use_normalized_coordinates=True,
      line_thickness=8)
  plt.figure(figsize=IMAGE_SIZE)
  plt.imshow(image_np)
  # 下面两行代码用于获取文件名
  filepath, tempfilename = os.path.split(image_path)
  shotname, extension = os.path.splitext(tempfilename)
  savefig("D:/DeepLearning/object_detection/result/"+shotname+extension)    存储结果的文件夹

好了,构建自己的物体识别模型整个过程就是这样结束了。最后上几张效果图,由于训练集很小,所以效果并不好。

训练效果可以说是非常不理想了,但是没关系,毕竟这是第一次搭建自己的模型,整个过程的原理流程几乎还是不知道,但是有了第一次的小小的成功,后面再深入里面的算法过程,修改学习率等参数,还有学习ssd,yolo ,fasterrcnn,慢慢来!!!

加油!!给自己!!!

 

基于TensorFlow Object Detection API搭建自己的物体识别模型代码如下: 1. 准备工作: - 安装TensorFlow Object Detection API - 准备训练和测试数据集 - 下载预训练模型权重 2. 导入所需库: ```python import tensorflow as tf from object_detection.utils import dataset_util from object_detection.utils import label_map_util from object_detection.utils import visualization_utils as vis_util ``` 3. 加载label map和模型: ```python PATH_TO_LABELS = 'path_to_label_map.pbtxt' PATH_TO_MODEL = 'path_to_pretrained_model' label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=90, use_display_name=True) category_index = label_map_util.create_category_index(categories) detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_MODEL, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') ``` 4. 定义函数进行物体识别: ```python def detect_objects(image): with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') image_expanded = np.expand_dims(image, axis=0) (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_expanded}) vis_util.visualize_boxes_and_labels_on_image_array( image, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) return image ``` 5. 加载测试图像并进行物体识别: ```python image = cv2.imread('test_image.jpg') image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) output_image = detect_objects(image) cv2.imshow('Object Detection', output_image) cv2.waitKey(0) cv2.destroyAllWindows() ``` 通过以上代码,可以使用自己的训练数据集、预训练模型权重和标签映射文件来搭建自己的物体识别模型。设置好路径并加载模型后,将待识别的图像传入`detect_objects`函数即可返回识别结果,并在图像上进行可视化展示。
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值