Tensorflow2 voc2012-tfrecord文件读写


TFRcord是Tensorflow内置的文件格式,是一种二进制文件,有时候我们会利用同一数据集训练不同的网络框架,对于较大数据集不断地复制移动读取较为不便,而TFRecord文件可将二进制数据和标签存储到同一文件中,可以更好地利用内存,方便复制和移动。

1 读取VOC-xml文件

xml文件数据存储的格式类似于数据结构中的树结构,anaotation为根节点。在voc2012中主要获取fIlename,width,height,和object(图像中的目标)下的name(目标名称)和bndbox(框的位置)。

<annotation>
	<folder>VOC2012</folder>
	<filename>2007_000123.jpg</filename>
	<source>
		<database>The VOC2007 Database</database>
		<annotation>PASCAL VOC2007</annotation>
		<image>flickr</image>
	</source>
	<size>
		<width>500</width>
		<height>375</height>
		<depth>3</depth>
	</size>
	<segmented>1</segmented>
	<object>
		<name>train</name>
		<pose>Unspecified</pose>
		<truncated>1</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>1</xmin>
			<ymin>26</ymin>
			<xmax>358</xmax>
			<ymax>340</ymax>
		</bndbox>
	</object>
</annotation>
import xml.dom.minidom as xdom

classes_names = open('model_data/voc_classes.txt', 'r').readlines()
voc_classes = {classes_names[i].strip(): i+1 for i in range(len(classes_names))}
print(voc_classes)

def Prase_Singel_xml(xml_path):
    DOMTree = xdom.parse(xml_path)
    RootNode = DOMTree.documentElement
    #获取图像名称
    image_name = RootNode.getElementsByTagName("filename")[0].childNodes[0].data
    #获取图像宽和高
    size = RootNode.getElementsByTagName("size")
    image_height = int(size[0].getElementsByTagName("height")[0].childNodes[0].data)
    image_width = int(size[0].getElementsByTagName("width")[0].childNodes[0].data)
    #获取图像中目标对象的名称及位置
    all_obj = RootNode.getElementsByTagName("object")
    bndbox_lable_dic = []
    for one_obj in all_obj:
        obj_name = one_obj.getElementsByTagName("name")[0].childNodes[0].data
        obj_label = voc_classes[obj_name]
        bndbox = one_obj.getElementsByTagName("bndbox")
        #获取目标的左上右下的位置
        xmin = int(bndbox[0].getElementsByTagName("xmin")[0].childNodes[0].data)
        ymin = int(bndbox[0].getElementsByTagName("ymin")[0].childNodes[0].data)
        xmax = int(bndbox[0].getElementsByTagName("xmax")[0].childNodes[0].data)
        ymax = int(bndbox[0].getElementsByTagName("ymax")[0].childNodes[0].data)
        bndbox_lable_dic.append([xmin, ymin, xmax, ymax, obj_label])

    return image_name, image_width, image_height, bndbox_lable_dic

if __name__=='__main__':
    print(Prase_Singel_xml('VOCdevkit/VOC2012/Annotations/2007_000027.xml'))

2 写入tfrecords文件

import tensorflow as tf
import glob
import os
from read_xml import Prase_Singel_xml

def write_to_tfrecord(all_xml_path, tfrecord_path, voc_img_path):
    writer = tf.io.TFRecordWriter(tfrecord_path)
    for i, single_xml_path in enumerate(all_xml_path):
        image_name, image_width, image_height, bndbox_lable_dic = Prase_Singel_xml(single_xml_path)
        sigle_img_path = os.path.join(voc_img_path, image_name)
        image_data = open(sigle_img_path, 'rb').read()
        xmin = []
        ymin = []
        xmax = []
        ymax = []
        obj_label = []
        for j in range(len(bndbox_lable_dic)):
            xmin.append(bndbox_lable_dic[j][0])
            ymin.append(bndbox_lable_dic[j][1])
            xmax.append(bndbox_lable_dic[j][2])
            ymax.append(bndbox_lable_dic[j][3])
            obj_label.append(bndbox_lable_dic[j][4])

        feature = {
            'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_data])),
            'width': tf.train.Feature(float_list=tf.train.FloatList(value=[image_width])),
            'height': tf.train.Feature(float_list=tf.train.FloatList(value=[image_height])),
            'xmin': tf.train.Feature(float_list=tf.train.FloatList(value=xmin)),
            'ymin': tf.train.Feature(float_list=tf.train.FloatList(value=ymin)),
            'xmax': tf.train.Feature(float_list=tf.train.FloatList(value=xmax)),
            'ymax': tf.train.Feature(float_list=tf.train.FloatList(value=ymax)),
            'label': tf.train.Feature(int64_list=tf.train.Int64List(value=obj_label))
        }
        example = tf.train.Example(features=tf.train.Features(feature=feature))
        writer.write(example.SerializeToString())
        print('第{}张图片写入完毕'.format(i))

if __name__=='__main__':
    all_xml_path = glob.glob('VOCdevkit/VOC2012/Annotations/*.xml')
    tfrecord_path = 'voc_2012.tfrecords'
    voc_img_path = 'VOCdevkit/VOC2012/JPEGImages'
    write_to_tfrecord(all_xml_path, tfrecord_path, voc_img_path)

3 读取tfrecords文件

import tensorflow as tf
import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

def parse_example(example_string):
    feature_dict = tf.io.parse_single_example(example_string, feature_description)
    image_data = tf.io.decode_jpeg(feature_dict['image'])
    boxes = tf.stack([tf.sparse.to_dense(feature_dict['xmin']),
                                tf.sparse.to_dense(feature_dict['ymin']),
                                tf.sparse.to_dense(feature_dict['xmax']),
                                tf.sparse.to_dense(feature_dict['ymax'])], axis=1)
    boxes_category = tf.sparse.to_dense(feature_dict['label'])
    return image_data, feature_dict['width'], feature_dict['height'], boxes, boxes_category

if __name__ == '__main__':
    raw_datasets = tf.data.TFRecordDataset('voc_2012.tfrecords')
    feature_description = {
        'image': tf.io.FixedLenFeature([], tf.string),
        'width': tf.io.FixedLenFeature([], tf.float32),
        'height': tf.io.FixedLenFeature([], tf.float32),
        'xmin': tf.io.VarLenFeature(tf.float32),
        'ymin': tf.io.VarLenFeature(tf.float32),
        'xmax': tf.io.VarLenFeature(tf.float32),
        'ymax': tf.io.VarLenFeature(tf.float32),
        'label': tf.io.VarLenFeature(tf.int64),
    }
    raw_datasets = raw_datasets.map(parse_example)
    print(raw_datasets)
    plt.figure(figsize=(15, 10))
    i = 0
    for image, width, height, boxes, boxes_category in raw_datasets.take(12):
        plt.subplot(3, 4, i+1)
        plt.imshow(image)
        ax = plt.gca()
        for j in range(boxes.shape[0]):
            rect = Rectangle((boxes[j, 0], boxes[j, 1]), boxes[j, 2]-boxes[j, 0], boxes[j, 3]-boxes[j, 1], color='r', fill=False)
            ax.add_patch(rect)
        i+=1
    plt.show()

在这里插入图片描述

  • 5
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
将VOC2007数据集转换为TFRecord文件需要以下步骤: 1. 下载VOC2007数据集并解压缩。 2. 安装TensorFlow和Pillow库。 3. 编写脚本将VOC2007数据集转换为TFRecord文件。 以下是一个简单的Python脚本示例,可以将VOC2007数据集转换为TFRecord文件: ```python import tensorflow as tf import os import io import xml.etree.ElementTree as ET from PIL import Image def create_tf_example(example): # 读取图像文件 img_path = os.path.join('VOCdevkit/VOC2007/JPEGImages', example['filename']) with tf.io.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size # 读取标注文件 xml_path = os.path.join('VOCdevkit/VOC2007/Annotations', example['filename'].replace('.jpg', '.xml')) with tf.io.gfile.GFile(xml_path, 'r') as fid: xml_str = fid.read() xml = ET.fromstring(xml_str) # 解析标注文件 xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for obj in xml.findall('object'): class_name = obj.find('name').text classes_text.append(class_name.encode('utf8')) classes.append(label_map[class_name]) bbox = obj.find('bndbox') xmins.append(float(bbox.find('xmin').text) / width) ymins.append(float(bbox.find('ymin').text) / height) xmaxs.append(float(bbox.find('xmax').text) / width) ymaxs.append(float(bbox.find('ymax').text) / height) # 构造TFRecord Example tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': tf.train.Feature(int64_list=tf.train.Int64List(value=[height])), 'image/width': tf.train.Feature(int64_list=tf.train.Int64List(value=[width])), 'image/filename': tf.train.Feature(bytes_list=tf.train.BytesList(value=[example['filename'].encode('utf8')])), 'image/source_id': tf.train.Feature(bytes_list=tf.train.BytesList(value=[example['filename'].encode('utf8')])), 'image/encoded': tf.train.Feature(bytes_list=tf.train.BytesList(value=[encoded_jpg])), 'image/format': tf.train.Feature(bytes_list=tf.train.BytesList(value=['jpeg'.encode('utf8')])), 'image/object/bbox/xmin': tf.train.Feature(float_list=tf.train.FloatList(value=xmins)), 'image/object/bbox/xmax': tf.train.Feature(float_list=tf.train.FloatList(value=xmaxs)), 'image/object/bbox/ymin': tf.train.Feature(float_list=tf.train.FloatList(value=ymins)), 'image/object/bbox/ymax': tf.train.Feature(float_list=tf.train.FloatList(value=ymaxs)), 'image/object/class/text': tf.train.Feature(bytes_list=tf.train.BytesList(value=classes_text)), 'image/object/class/label': tf.train.Feature(int64_list=tf.train.Int64List(value=classes)), })) return tf_example # 将VOC2007数据集转换为TFRecord文件 def create_tf_record(output_file): examples = [...] # 从VOC2007数据集读取实例 writer = tf.io.TFRecordWriter(output_file) for example in examples: tf_example = create_tf_example(example) writer.write(tf_example.SerializeToString()) writer.close() label_map = {...} # 标签映射 output_file = 'voc2007_train.tfrecord' create_tf_record(output_file) ``` 其中`create_tf_example`函数将一个VOC2007样本转换为TFRecord Example,`create_tf_record`函数将整个VOC2007数据集转换为TFRecord文件。在这个例子中,我们假设VOC2007数据集已经被解压缩到`VOCdevkit/VOC2007`目录下,标签映射已经定义为`label_map`变量。你需要根据自己的实际情况修改这些变量。
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值