利用TensorFlow object detection API训练目标检测的模型
----车牌检测
安装并配置好TensorFlow object detection API的环境,
参考链接 https://blog.csdn.net/weixin_43483316/article/details/104720642
车牌识别相关数据,建议自己制作数据集
https://pan.baidu.com/s/1A33nrnm5tZrYNZLPNNfZCA
提取码:u1ck
- 制作数据集
工具:labelImg
下载地址: https://pan.baidu.com/s/1kcsSyT3Vnlw5ItSUqrZ_dg 提取码: 2557
(我们要做的工作就是在图片上标注出我们要检测的部位,标注少量的图片数据,然后丢给电脑去训练。告诉电脑,相当于告诉电脑,告诉它这个地方是车牌,你自己去找车牌的特征,等下我拿其他图片给你,你要告诉我车牌的部位在哪)
开始给图片做标注,打开labelImg,点击open dir, 添加图片所在的路径。会有如下显示。详细步骤如图所示
点击edit->create rectbox就可以进行标记了。标记完后点击save即可, 点击next page进行下一张标记
标注得到内容如下
<?xml version="1.0"?>
-<annotation>
<folder>tesi_images</folder>
<filename>image101.jpg</filename>
<path>C:\Users\lbh\Desktop\人工智能备赛\tensorfowAPI\dataset\tesi_images\image101.jpg</path>
-<source>
<database>Unknown</database>
</source>
-<size>
<width>1600</width>
<height>1200</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
-<object>
<name>plate</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
-<bndbox>
<xmin>790</xmin>
<ymin>716</ymin>
<xmax>971</xmax>
<ymax>761</ymax>
</bndbox>
</object>
</annotation>
之后再将标签文件和图片分成两个文件夹。注意:图片命名不能带中文,标志后不要修改图片和标注的名称。图片放在imags,标注文件放在xmls
-
转换数据集
(1) tensorflow object detection API要求训练的数据集是tfrecord个格式的数据,所以将.xml文件转化为tfrecord格式的文件。在models-master路径下(与research同级)新建一个文件夹,datasets。如果你前面的环境没问题,你放在哪个路径都是可以的
(2) 创建xml转csv数据集的处理文件 xml_to_csv.py
代码内容如下:
import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
def xml_to_csv(path):
xml_list = []
# 读取注释文件
for xml_file in glob.glob(path + '/*.xml'):
tree = ET.parse(xml_file)
root = tree.getroot()
for member in root.findall('object'):
# (注意查看xml文件里边框数据的位置)
value = (root.find('filename').text,
int(root.find('size')[0].text),
int(root.find('size')[1].text),
member[0].text,
int(member[4][0].text),
int(member[4][1].text),
int(member[4][2].text),
int(member[4][3].text)
)
xml_list.append(value)
column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
# 将所有数据分为样本集和验证集,按照3:1的比例
train_list = xml_list[0: int(len(xml_list) * 0.67)]
eval_list = xml_list[int(len(xml_list) * 0.67) + 1:]
# 保存为CSV格式
train_df = pd.DataFrame(train_list, columns=column_name)
eval_df = pd.DataFrame(eval_list, columns=column_name)
train_df.to_csv('train.csv', index=None)
eval_df.to_csv('eval.csv', index=None)
def main():
path = ' ./datasets/data/xmls ' # xml文件所在的文件路径
xml_to_csv(path)
print('Successfully converted xml to csv.')
main()
运行代码,就可以进行转换:生成两个csv文件
(4) 将生成的csv文件转换tfrecord格式,在dataset下新建generate_tfrecord.py
代码如下:
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
import os
import io
import pandas as pd
import tensorflow as tf
from PIL import Image
# from object_detection.utils import dataset_util
from object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict
flags = tf.app.flags
flags.DEFINE_string('csv_input', '', 'Path to the CSV input')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
FLAGS = flags.FLAGS
# 将分类名称转成ID号
def class_text_to_int(row_label):
if row_label == 'plate':
return 1
else:
print('NONE: ' + row_label)
# None
def split(df, group):
data = namedtuple('data', ['filename', 'object'])
gb = df.groupby(group)
return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]
def create_tf_example(group, path):
print(os.path.join(path, '{}'.format(group.filename)))
with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = Image.open(encoded_jpg_io)
width, height = image.size
filename = (group.filename + '.jpg').encode('utf8')
image_format = b'jpg'
xmins = []
xmaxs = []
ymins = []
ymaxs = []
classes_text = []
classes = []
for index, row in group.object.iterrows():
xmins.append(row['xmin'] / width)
xmaxs.append(row['xmax'] / width)
ymins.append(row['ymin'] / height)
ymaxs.append(row['ymax'] / height)
classes_text.append(row['class'].encode('utf8'))
classes.append(class_text_to_int(row['class']))
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
def main(csv_input, output_path, imgPath):
writer = tf.python_io.TFRecordWriter(output_path)
path = imgPath
examples = pd.read_csv(csv_input)
grouped = split(examples, 'filename')
for group in grouped:
tf_example = create_tf_example(group, path)
writer.write(tf_example.SerializeToString())
writer.close()
print('Successfully created the TFRecords: {}'.format(output_path))
if __name__ == '__main__':
# 数据集路径
imgPath = '/picture_new'
# 生成train.record文件
output_path = 'data/train.record'
csv_input = 'data/train.csv'
main(csv_input, output_path, imgPath)
# 生成验证文件 eval.record
output_path = 'data/eval.record'
csv_input = 'data/eval.csv'
main(csv_input, output_path, imgPath)
得到两个record格式的文件(train.record、eval.record)
-
下载fine-tune模型
模型地址:https://github.com/tensorflow/models/tree/master/research/slim
根据需要下载具体的模型,在dataset文件夹下新建fine_tune_model文件夹,将下载的模型解压后,复制三个.ckpt文件放到fine_tune_model文件夹下model.ckpt.data-00000-of-00001, model.ckpt.index model.ckpt.meta
-
配置文件
(1) 在dataset下新建data文件夹,在data文件夹下创建label_map.pbtxtx文件:
内容如下(如果有多个标签,修改id和对应的name):item { id: 1 name: 'plate' } item { id: 2 name: 'xxxx' }
(2) 找到目录research/object_detection/samples/configs/ssd_inception_v2_pets.config文件,将此文件复制到dataset/data文件夹下
并对以下的地方做出修改:
第一处,修改为自己检测的目标的类别数,在ssd下面:
num_classes: 1
第二处,修改数据集数量,在eval_config下面:
num_examples: 5823
第三处,修改模型路径:
fine_tune_checkpoint: "./dataset/fine_tune_model/model.ckpt"
第四处,修改train_input_reader
将input_path参数改为前面生成train.record文件的路径
将label_map_path参数改为前面label_map.pbtx文件的路径
input_path: "./dataset/data/train.record"
label_map_path: "./dataset/data/label_map.pbtxt"
第五处,修改eval_input_reader:
将 input_path参数改为前面生成eval.record文件的路径
将label_map_path参数改为前面label_map.pbtx文件的路径
input_path: "./dataset/data/ eval.record"
label_map_path: "./dataset/data/label_map.pbtxt"
-
修改训练代码train.py
(1) 在datasets下面新建一个保存模型的文件夹train_dir
旧版models-master对应的路径:/research/object_detection
新版models-master对应的路径:/research/object_detection/legacy
建议复制到datasets目录下训练flags.DEFINE_string('train_dir', r'/datasets/train_dir', # 指定模型的输出路径 'Directory to save the checkpoints and training summaries.')
flags.DEFINE_string('pipeline_config_path', '/datasets/data/ pipeline_config ', #配置文件路径 'Path to a pipeline_pb2.TrainEvalPipelineConfig config ' 'file. If provided, other configs are ignored')
(2) 训练模型
python train.py
训练结果保存在datasets/train_dir
-
模型格式转换
有部分代码需要用pb格式的代码,所以需要将模型格式进行转换
(1) 复制/research/object_detection目录下的export_inference_graph.py文件,复制到dataset(2) 代码修改三处路径
vim export_inference_graph.py第一处,修改pipeline_config路径: flags.DEFINE_string('pipeline_config_path', 'data/ pipeline.config ', 'Path to a pipeline_pb2.TrainEvalPipelineConfig config ' 'file.')
第二处,修改前面生成的模型路径: flags.DEFINE_string('trained_checkpoint_prefix', 'train_dir/model.ckpt-1500', 'Path to trained checkpoint, typically of the form ' 'path/to/model.ckpt')
第三处,修改生成模型pb格式的路径: flags.DEFINE_string('output_directory', ' train_dir/pb/frozen_inference_graph.pb', 'Path to write outputs.')
(3) 执行转换代码: python export_inference_graph.py 得到pb格式的模型
-
预测模型
在datasets文件夹下新建一个python文件,命名为prediction.py,拷贝下面的代码# 导入一些需要的包和设置环境 import numpy as np import tensorflow as tf import os from matplotlib import pyplot as plt from PIL import Image from object_detection.utils import label_map_util from object_detection.utils import visualization_utils as vis_util # 模型 PATH_TO_FROZEN_GRAPH = 'train_dir/pb/frozen_inference_graph.pb/frozen_inference_graph.pb' # 配置文件路径文件夹下 PATH_TO_LABELS = 'data/label_map.pbtxt' # 新建一个图 detection_graph = tf.Graph() # 定义一个图 with detection_graph.as_default(): od_graph_def = tf.GraphDef() # 重新定义一个图 # tf.gfile.GFile(filename, mode)获取文本操作句柄,类似于python提供的文本操作open()函数, # filename是要打开的文件名,mode是以何种方式去读写,将会返回一个文本操作句柄。 with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid: # 将*.pb文件读入serialized_graph serialized_graph = fid.read() # 将serialized_graph的内容恢复到图中 od_graph_def.ParseFromString(serialized_graph) # 将od_graph_def导入当前默认图中(加载模型) tf.import_graph_def(od_graph_def, name='') # 载入数据集标签文件 category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True) # print(category_index) # 在进行检测之前,定义一个帮助函数,该函数的功能是将图片转换为Numpy数组的形式 def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8) # 预测图片文件夹路径 TEST_IMAGE_PATHS = [] Image_path = ' train_data/picture_new/' for i in (os.listdir(Image_path)): print(Image_path +i) TEST_IMAGE_PATHS.append(Image_path +i) # 输出图像的大小(单位是in) IMAGE_SIZE = (12, 8) with tf.Session(graph=detection_graph) as sess: for image_path in TEST_IMAGE_PATHS: image = Image.open(image_path) # 将图片转换为numpy格式 image_np = load_image_into_numpy_array(image) # 将图片扩展一维,最后进入神经网络的图片格式应该是[1,?,?,3],括号内参数分别为一个batch传入的数量,宽,高,通道数 image_np_expanded = np.expand_dims(image_np, axis=0) # 获取模型中的tensor image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # boxes变量存放了所有检测框 boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # score表示每个检测结果的confidence scores = detection_graph.get_tensor_by_name('detection_scores:0') # classes表示每个框对应的类别 classes = detection_graph.get_tensor_by_name('detection_classes:0') # num_detections表示检测框的个数 num_detections = detection_graph.get_tensor_by_name('num_detections:0') # 开始检测 boxes, scores, classes, num_detections = sess.run([boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) # 可视化结果 # squeeze函数:从数组的形状中删除单维度条目,即把shape中为1的维度去掉 vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) plt.figure(figsize=IMAGE_SIZE) plt.imshow(image_np) plt.show()
#修改模型路径、配置文件路径、预测图片路径
车牌检测预测效果
同理,训练行人的数据集得到的模型预测
训练人脸口罩的数据集得到的模型预测
-
补充环境信息:
python版本: python3.5 Package Version -------------------- ------------------------- absl-py 0.9.0 astor 0.8.1 certifi 2018.8.24 cycler 0.10.0 Cython 0.29.15 gast 0.3.3 google-pasta 0.1.8 grpcio 1.27.2 h5py 2.10.0 Keras-Applications 1.0.8 Keras-Preprocessing 1.1.0 kiwisolver 1.1.0 lxml 4.5.0 Markdown 3.2.1 matplotlib 2.2.5 numpy 1.18.1 pandas 0.20.0 Pillow 7.0.0 pip 20.0.2 protobuf 3.11.3 pyparsing 2.4.6 python-dateutil 2.8.1 pytz 2019.3 setuptools 40.4.3 six 1.14.0 slim 0.1 tensorboard 1.14.0 tensorflow-estimator 1.14.0 tensorflow-gpu 1.14.0 termcolor 1.1.0 Werkzeug 1.0.0 wheel 0.34.2 wrapt 1.12.0