object_detection API 环境的搭建–>模型训练->模型导出
object_detection API 基于 tensorflow 在自己电脑上训练自己的模型和导出模型,进行部署。
下载地址:https://github.com/tensorflow/models
预训练模型下载地址: https://github.com/tensorflow/models/tree/master/research/slim
本项目以 C:\ 为根目录
经查阅多方资料,对相关步骤进行详细说明和记录。
一:环境搭建
基本环境描述:
- —win10
- —python3.6.3
- —tensorflow1.6
安装 Anaconda3-5.0.1-Windows-x86_64
清华镜像 https://mirrors.tuna.tsinghua.edu.cn/anaconda/archive/
更新 pip :
python -m pip install --upgrade pip
安装 tensorflow1.6
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple tensorflow==1.6
将 models 文件复制到 C:\ 下
下载 protoc 解压 把 bin 文件夹下 protoc.exe,复制到 C:\Windows\System32 文件夹下,
cmd 打开命令行界面,输入命令 protot
添加环境变量 :
“此电脑”右键-》属性-》高级系统设置-》环境变量-》Path 编辑-》添加
C:\models\research
C:\models\research\slim
proto 文件转为 py 文件
运行 Windows PowerShell 定位到 C:\models\research 运行:
Get-ChildItem object_detection/protos/*.proto | Resolve-Path -Relative | %{ protoc $_ --python_out=. }
安装完成测试
定位到 C:\models\research\ 运行:
python object_detection/builders/model_builder_test.py
可能会报错:No module named ‘object_detection’
可在这个文件中加入:
import sys
sys.path.append('c:\\models')
可能导入路径还要更改为
from research.object_detection......
如果出现:
可以认为环境搭建是成功的。
二:数据准备
下载 labelImg 图像标注工具
本项目中我的 img,xml 路径
img 文件夹用于存放要训练的图片,xml 文件夹用于存放 labelImg 工具对图片标注后生成的 xml 文件
导入图片后,按 w,框选对你,进行标注操作
建议训练图片在100张左右
在 C:\models\tf_train\data 文件夹下创建 label_map.pbtxt,可可用记事本打开,记录标记名,及对应 id
在 C:\models\tf_train 下创建 xml_to_csv.py 代码如下
# -*- coding: utf-8 -*-
# 斌彬电脑
# @Time : 2020/7/20 0024 8:29
import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
def xml_to_csv(path):
xml_list = []
# 读取注释文件
for xml_file in glob.glob(path + '/*.xml'):
tree = ET.parse(xml_file)
root = tree.getroot()
for member in root.findall('object'):
value = (root.find('filename').text,
int(root.find('size')[0].text),
int(root.find('size')[1].text),
member[0].text,
int(member[4][0].text),
int(member[4][1].text),
int(member[4][2].text),
int(member[4][3].text)
)
xml_list.append(value)
column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
# 将所有数据分为样本集和验证集,一般按照3:1的比例
train_list = xml_list[0: int(len(xml_list) * 0.67)]
eval_list = xml_list[int(len(xml_list) * 0.67) + 1: ]
train_list = xml_list[0: int(len(xml_list) * 0.99)]
eval_list = xml_list[int(len(xml_list) * 0.99) + 1:]
# 保存为CSV格式
train_df = pd.DataFrame(train_list, columns=column_name)
eval_df = pd.DataFrame(eval_list, columns=column_name)
train_df.to_csv('data/train.csv', index=None)
eval_df.to_csv('data/eval.csv', index=None)
def main():
# xml 文件路径
path = r'C:\models\tf_train\data\xml'
xml_to_csv(path)
print('成功把 xml 转为 csv.')
main()
定位到 C:\models\tf_train 下,运行该文件
python xml_to_csv.py
将完成 xml 到 csv 的转换,在 C:\models\tf_train\data 下会出现 train.csv ,eval.csv 文件
在 C:\models\tf_train 下创建 to_tfrecord.py 用于生成 .record 文件
# -*- coding: utf-8 -*-
# 斌彬电脑
# @Time : 2020/7/24 8:50
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
import os
import io
import pandas as pd
import tensorflow as tf
from PIL import Image
import sys
sys.path.append('c:\\models')
# from object_detection.utils import dataset_util
from research.object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict
flags = tf.app.flags
flags.DEFINE_string('csv_input', '', 'Path to the CSV input')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
FLAGS = flags.FLAGS
# 将分类名称转成ID号
def class_text_to_int(row_label):
if row_label == 'bai_lan':
return 1
elif row_label == 'lan_lan':
return 2
elif row_label == 'hong_lan':
return 3
elif row_label == 'jin_lan':
return 4
elif row_label == 'hui_lan':
return 5
else:
print('NONE: ' + row_label)
# None
def split(df, group):
data = namedtuple('data', ['filename', 'object'])
gb = df.groupby(group)
return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]
def create_tf_example(group, path):
print(os.path.join(path, '{}'.format(group.filename)))
with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = Image.open(encoded_jpg_io)
width, height = image.size
filename = (group.filename + '.jpg').encode('utf8')
image_format = b'jpg'
xmins = []
xmaxs = []
ymins = []
ymaxs = []
classes_text = []
classes = []
for index, row in group.object.iterrows():
xmins.append(row['xmin'] / width)
xmaxs.append(row['xmax'] / width)
ymins.append(row['ymin'] / height)
ymaxs.append(row['ymax'] / height)
classes_text.append(row['class'].encode('utf8'))
classes.append(class_text_to_int(row['class']))
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
def main(csv_input, output_path, imgPath):
writer = tf.python_io.TFRecordWriter(output_path)
path = imgPath
examples = pd.read_csv(csv_input)
grouped = split(examples, 'filename')
for group in grouped:
tf_example = create_tf_example(group, path)
writer.write(tf_example.SerializeToString())
writer.close()
print('Successfully created the TFRecords: {}'.format(output_path))
if __name__ == '__main__':
imgPath = r'C:\models\tf_train\data\img'
# 生成train.record文件
output_path = 'data/train.record'
csv_input = 'data/train.csv'
main(csv_input, output_path, imgPath)
# 生成验证文件 eval.record
output_path = 'data/eval.record'
csv_input = 'data/eval.csv'
main(csv_input, output_path, imgPath)
print('任务完成')
在 class_text_to_int 函数中按刚刚创建的 C:\models\tf_train\data\label_map.pbtxt 文件进行对修改,id 和 name 要对应
定位到 C:\models\tf_train,运行
python to_tfrecord.py
下载 需要fine-tune的模型:解压到 C:\models\tf_train\fine_tune_model 目录下,
三:管道文件配置
把 research\object_detection\samples\configs\ssd_mobilenet_v2_coco.config ( 下载什么模型就找对应的文件 ) 复制到 C:\models\tf_train\data 修改相关参数
检测目标的个数
record 文件,label_map.pbtxt 标注文件的路径
定位到 C:\models\research\object_detection 尝试运行
python train.py --logtostderr --train_dir=C:/models/tf_train/training --pipeline_config_path=C:/models/tf_train/data/ssd_inception_v2_pets.config
如果报错,对 train.py 做相应修改
import sys
sys.path.append('c:\\models\\research')
再尝试运行,找到报错文件,添加如上代码
直到成功开始训练
训练产生的模型文件
训练时中途可停止,下次开始时不会从头开始训练,如这里训练到1019步停止,下次训练时将从这里开始接下云训练
四:模型导出为 .pb
在 C:\models\tf_train 下创建 export_inference_graph.py
# -*- coding: utf-8 -*-
# 斌彬电脑
# @Time : 2020/7/24 14:50
import sys
sys.path.append('c:\\models')
import tensorflow as tf
from google.protobuf import text_format
from research.object_detection import exporter
from research.object_detection.protos import pipeline_pb2
slim = tf.contrib.slim
flags = tf.app.flags
flags.DEFINE_string('input_type', 'image_tensor', 'Type of input node. Can be '
'one of [`image_tensor`, `encoded_image_string_tensor`, '
'`tf_example`]')
flags.DEFINE_string('input_shape', None,
'If input_type is `image_tensor`, this can explicitly set '
'the shape of this input tensor to a fixed size. The '
'dimensions are to be provided as a comma-separated list '
'of integers. A value of -1 can be used for unknown '
'dimensions. If not specified, for an `image_tensor, the '
'default shape will be partially specified as '
'`[None, None, None, 3]`.')
flags.DEFINE_string('pipeline_config_path', r'C:\models\tf_train\data\ssd_inception_v2_pets.config',
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file.')
flags.DEFINE_string('trained_checkpoint_prefix', r'C:\models\tf_train\training\model.ckpt-1019',
'Path to trained checkpoint, typically of the form '
'path/to/model.ckpt')
flags.DEFINE_string('output_directory', r'C:\models\tf_train\data\frozen_inference_graph.pb\frozen_inference_graph.pb', 'Path to write outputs.')
flags.DEFINE_string('config_override', '',
'pipeline_pb2.TrainEvalPipelineConfig '
'text proto to override pipeline_config_path.')
tf.app.flags.mark_flag_as_required('pipeline_config_path')
tf.app.flags.mark_flag_as_required('trained_checkpoint_prefix')
tf.app.flags.mark_flag_as_required('output_directory')
FLAGS = flags.FLAGS
def main(_):
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
text_format.Merge(f.read(), pipeline_config)
text_format.Merge(FLAGS.config_override, pipeline_config)
if FLAGS.input_shape:
input_shape = [
int(dim) if dim != '-1' else None
for dim in FLAGS.input_shape.split(',')
]
else:
input_shape = None
exporter.export_inference_graph(FLAGS.input_type, pipeline_config,
FLAGS.trained_checkpoint_prefix,
FLAGS.output_directory, input_shape)
if __name__ == '__main__':
tf.app.run()
其中 pipeline_config_path 管道文件的路径
trained_checkpoint_prefix 训练好的模型文件路径,这里我选择最大步数的 model.ckpt-1019
output_directory 导出 pb 文件存放 的路径
定位到 C:\models\tf_train 运行:
python export_inference_graph
五: .pb 模型的测试
在 C:\models\tf_train 下创建文件夹 test_images ,随便拍几张照片放到该文件夹下
这是一段用训练好的模型做的商品识别,
1596068932705
在尝试更换预训练模型 faster_rcnn_inception_v2_coco_2018_01_28 会报错
Argument must be a dense tensor: range(0, 3) - got shape [3], but wanted []
因为代码兼容问题要将 range(num_boundaries) 转为 list
解决方法
修改 C:\models\research\object_detection\utils 下 learning_schedules.py
rate_index = tf.reduce_max(tf.where(tf.greater_equal(global_step, boundaries),
list(range(num_boundaries)),
[0] * num_boundaries))
替换原属代码
用这个预训练模型,要高端显卡,如 1060ti 以上,