数据格式转换
在之前的一篇博客目标检测———LabelImg数据标注中已经介绍过了数据标注。
将标注后等得到的.xml格式文件转为.csx格式文件,运行xml_to_csv.py文件后产生eval.csv和train.csv两个文件。
xml_to_csv.py
import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
def xml_to_csv(path):
xml_list = []
# 读取注释文件
for xml_file in glob.glob(path + '/*.xml'):
tree = ET.parse(xml_file)
root = tree.getroot()
for member in root.findall('object'):
value = (root.find('filename').text,
int(root.find('size')[0].text),
int(root.find('size')[1].text),
member[0].text,
int(member[4][0].text),
int(member[4][1].text),
int(member[4][2].text),
int(member[4][3].text)
)
xml_list.append(value)
column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
# 将所有数据分为样本集和验证集,一般按照3:1的比例
train_list = xml_list[0: int(len(xml_list) * 0.67)]
eval_list = xml_list[int(len(xml_list) * 0.67) + 1:]
# 保存为CSV格式
train_df = pd.DataFrame(train_list, columns=column_name)
eval_df = pd.DataFrame(eval_list, columns=column_name)
train_df.to_csv('data/train.csv', index=None)
eval_df.to_csv('data/eval.csv', index=None)
def main():
# path = 'E:\\\data\\\Images'
path = r'./Data' # path参数更具自己xml文件所在的文件夹路径修改
xml_to_csv(path)
print('Successfully converted xml to csv.')
main()
产生的.csv文件内容、格式如下图:
再将.csv格式文件转成TensorFlow能接收的.record格式,运行generate_tfrcord.py文件后产生eval.record和train.record两个文件。
generate_tfrcord.py
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
import os
import io
import pandas as pd
import tensorflow as tf
from PIL import Image
# from object_detection.utils import dataset_util
from research.object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict
flags = tf.app.flags
flags.DEFINE_string('csv_input', '', 'Path to the CSV input')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
FLAGS = flags.FLAGS
# 将分类名称转成ID号(按自己类别修改)
def class_text_to_int(row_label):
if row_label == 'mask':
return 1
# elif row_label == 'car':
# return 2
# elif row_label == 'person':
# return 3
# elif row_label == 'kite':
# return 4
else:
print('NONE: ' + row_label)
# None
def split(df, group):
data = namedtuple('data', ['filename', 'object'])
gb = df.groupby(group)
return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]
def create_tf_example(group, path):
print(os.path.join(path, '{}'.format(group.filename)))
with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = Image.open(encoded_jpg_io)
width, height = image.size
filename = (group.filename + '.jpg').encode('utf8')
image_format = b'jpg'
xmins = []
xmaxs = []
ymins = []
ymaxs = []
classes_text = []
classes = []
for index, row in group.object.iterrows():
xmins.append(row['xmin'] / width)
xmaxs.append(row['xmax'] / width)
ymins.append(row['ymin'] / height)
ymaxs.append(row['ymax'] / height)
classes_text.append(row['class'].encode('utf8'))
classes.append(class_text_to_int(row['class']))
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
def main(csv_input, output_path, imgPath):
writer = tf.python_io.TFRecordWriter(output_path)
path = imgPath
examples = pd.read_csv(csv_input)
grouped = split(examples, 'filename')
for group in grouped:
tf_example = create_tf_example(group, path)
writer.write(tf_example.SerializeToString())
writer.close()
print('Successfully created the TFRecords: {}'.format(output_path))
if __name__ == '__main__':
# imgPath = 'E:\data\Images'
imgPath = r'/home/well/PycharmProjects/Mask_Detection/mask'
# 生成train.record文件(改成自己的路径)
output_path = 'data/train.record'
csv_input = 'data/train.csv'
main(csv_input, output_path, imgPath)
# 生成验证文件 eval.record(改成自己的路径)
output_path = 'data/eval.record'
csv_input = 'data/eval.csv'
main(csv_input, output_path, imgPath)
环境配置
下载TensorFlow Models:
git clone https://github.com/tensorflow/models.git
环境:
anaconda3
python3.6
tensorflow-gpu=1.15(试过1.X和1.13的版本都不行,多次踩坑TAT)
其他详细依赖、环境测试看这里,COCO API可以先不安装。
环境安装配置好后,在/models/research目录下执行下面命令测试:
python object_detection/builders/model_builder_test.py
显示结果如下则表示成功:
模型训练和测试
为方便管理,在models目录下新建一个dataset文件夹,然后在dataset中新建data和fine_tune_model两个文件夹。其中data文件夹用来放训练文件,fine_tune_model文件夹用来放预训练模型。将eval.record和train.record两个文件放入data文件夹中,然后在data文件夹中新建label_map.pbtxt文件。
label_map.pbtxt文件内容如下,根据自己的类别修改。其中id和name要跟上面generate_tfrcord.py中的相对应。
item {
id: 1
name: 'xxx'
}
item {
id: 2
name: 'xxx'
}
item {
id: 3
name: 'xxx'
}
下载预训练模型,本为以ssd_mobilenet_v1_coco_2018_01_28为例,下载后解压到fine_tune_model文件夹中。
将research/object_detection/ssd_mobilenet_v1_coco.config复制到data文件夹中。
修改sd_mobilenet_v1_coco.config文件:
第9行 num_classes:1 (按自己的类别数修改)
第156行 fine_tune_checkpoint:/fine_tune_model/ssd_mobilenet_v1_coco_2018_01_28/model.ckpt(预训练模型地址)
第175行input_path:./dataset/data/train.record(train.record地址)
第177行label_map_path:./dataset/data/label_map.pbtxt(label_map.pbtxt地址,下同)
第189行input_path:./dataset/data/eval.record(eval.record地址)
第191行label_map_path:/dataset/data/label_map.pbtxt
开始训练(在object_detection目录下,可改成绝对路径):
python model_main.py --model_dir=/models/dataset/data --pipeline_config_path=/models/dataset/data/ssd_mobilenet_v1_coco.config --num_train_steps=10000 --num_eval_steps=20 –alsologtostderr
训练结束后可以的看到有这些文件。
训练完成之后把训练文件转为pb文件,在data文件夹中新建savemodel文件夹来存放转换后的文件:
python export_inference_graph.py –input_type=image_tensor --pipeline_config_path=./models/dataset/data/ssd_mobilenet_v1_coco.config --trained_checkpoint_prefix=./models/dataset/data/model.ckpt-10000 --output_directory=./models/dataset/data/savemodel
frozen_inference_graph.pb是我们要用到的模型。
测试模型:
在dataset文件夹中新建test_image文件夹用来放测试图片。
执行object_detection_tutorial.py
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
# # This is needed to display the images.
# %matplotlib inline
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
# from utils import label_map_util
# from utils import visualization_utils as vis_util
from research.object_detection.utils import label_map_util
from research.object_detection.utils import visualization_utils as vis_util
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = r'./models/dataset/data/ssdv1/savemodel/frozen_inference_graph.pb' # 上一步生成的.pb文件的路径
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = r'./models/dataset/data/label_map.pbtxt' # 添加pbtxt文件的路径
NUM_CLASSES = 1 # 这里根据自己的类别数修改
# Load a (frozen) Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
# Loading label map
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
use_display_name=True)
category_index = label_map_util.create_category_index(categories)
# Helper code
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
# For the sake of simplicity we will use only 2 images: # 这里说明测试图片的命名规则为imagen.jpg, 遵守规则即可
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = r'./models/dataset/test_image' # 存放测试图片的路径
TEST_IMAGE_PATHS = [os.path.join(PATH_TO_TEST_IMAGES_DIR, 'mask_{}.jpg'.format(i)) for i in
range(17, 25)] # 修改测试图片的张数range(1, n + 1), 为测试图片的张数
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
# Definite input and output Tensors for detection_graph
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
for image_path in TEST_IMAGE_PATHS:
image = Image.open(image_path)
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = load_image_into_numpy_array(image)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8)
plt.figure(figsize=IMAGE_SIZE)
plt.imshow(image_np)
plt.show()
测试结果:
报错总结
- 报错:object_detection_tutorial.py:110: UserWarning: Matplotlib is
currently using agg, which is a non-GUI backend, so cannot show the
figure. plt.show() - 解决方法:将/models/research/object_detection/utils/visualization_utils.py文件中的第29行的Agg改成TkAgg。
- 报错:AttributeError: module ‘tensorflow._api.v1.compat’ has no
attribute ‘v2’… - 解决方法:使用的tensorflow环境不行,本人使用tensorflow-gpu=1.15即可。
至此,整个TensorFlow目标检测项目就完成啦, 还有其他类似问题大都是TensorFlow版本问题,有任何问题欢迎流言,下一篇博客将介绍把TensorFlow模型迁移到安卓手机使用的方法。