最近在做语义分割的时候用到了TensorFLow框架,TensorFlow数据集输入的格式需要为.TFRecord格式,而手上只有.JSON格式的数据集,经过多番查找和尝试后最终成功转换,在此做个记录。
.JSON转VOC
原始的数据集:每张图片对应一个.json文件。
除此之外还有个labels.txt文件,里面写着类别名。
首先是使用下面代码将.json格式文件转换成voc格式。
labelme2voc.py
#!/usr/bin/env python
#python labelme2voc.py input_dir=./hair-lips-json-dataset/ output/ --labels=labels.txt
from __future__ import print_function
import argparse
import glob
import json
import os
import os.path as osp
import sys
import numpy as np
import PIL.Image
import labelme
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument('input_dir', help='input annotated directory')
parser.add_argument('output_dir', help='output dataset directory')
parser.add_argument('--labels', help='labels file', required=True)
args = parser.parse_args()
if osp.exists(args.output_dir):
print('Output directory already exists:', args.output_dir)
sys.exit(1)
os.makedirs(args.output_dir)
os.makedirs(osp.join(args.output_dir, 'JPEGImages'))
os.makedirs(osp.join(args.output_dir, 'SegmentationClass'))
os.makedirs(osp.join(args.output_dir, 'SegmentationClassPNG'))
os.makedirs(osp.join(args.output_dir, 'SegmentationClassVisualization'))
print('Creating dataset:', args.output_dir)
class_names = []
class_name_to_id = {}
for i, line in enumerate(open(args.labels).readlines()):
class_id = i - 1 # starts with -1
class_name = line.strip()
class_name_to_id[class_name] = class_id
if class_id == -1:
assert class_name == '__ignore__'
continue
elif class_id == 0:
assert class_name == '_background_'
class_names.append(class_name)
class_names = tuple(class_names)
print('class_names:', class_names)
out_class_names_file = osp.join(args.output_dir, 'class_names.txt')
with open(out_class_names_file, 'w') as f:
f.writelines('\n'.join(class_names))
print('Saved class_names:', out_class_names_file)
colormap = labelme.utils.label_colormap(255)
print(osp.join(args.input_dir, '*.json'))
for label_file in glob.glob(osp.join(args.input_dir, '*.json')):
print('Generating dataset from:', label_file)
with open(label_file) as f:
base = osp.splitext(osp.basename(label_file))[0]
print(base)
out_img_file = osp.join(
args.output_dir, 'JPEGImages', base + '.jpg')
out_lbl_file = osp.join(
args.output_dir, 'SegmentationClass', base + '.npy')
out_png_file = osp.join(
args.output_dir, 'SegmentationClassPNG', base + '.png')
out_viz_file = osp.join(
args.output_dir,
'SegmentationClassVisualization',
base + '.jpg',
)
data = json.load(f)
# img_file = osp.join(osp.dirname(label_file), data['imagePath'])
img_file = os.path.join(args.input_dir, base + '.jpg')
img = np.asarray(PIL.Image.open(img_file))
PIL.Image.fromarray(img).save(out_img_file)
lbl = labelme.utils.shapes_to_label(
img_shape=img.shape,
shapes=data['shapes'],
label_name_to_value=class_name_to_id,
)
labelme.utils.lblsave(out_png_file, lbl)
np.save(out_lbl_file, lbl)
viz = labelme.utils.draw_label(
lbl, img, class_names, colormap=colormap)
PIL.Image.fromarray(viz).save(out_viz_file)
if __name__ == '__main__':
main()
转换后得到文件如下图:
其中JPEGImages中放着原始图片:
SegmentationClass中放着.npy格式文件:
SegmentationClassPNG:
SegmentationClassVisualization:
class_names.txt:
数据集划分
转换成voc后,将数据集分成训练集和验证集,这里是90%训练集,10%验证集,路径、数量可以根据自己情况修改。
train_valid_txt.py
import os
import random
root_path = os.getcwd()
print(root_path)
images_dir = root_path + '/output/JPEGImages/'
images_list = os.listdir(images_dir)
print(images_list)
print('----------------------------------------------')
random.shuffle(images_list) # 打乱图片的分布
print(images_list)
total_num = len(images_list)
print("总数量:", total_num)
train_num = int(total_num * 0.9)
print("训练集数量:", train_num)
print("验证集数量:", total_num - train_num)
list_file_1 = open(root_path + '/train.txt', 'w')
for item1 in images_list[:train_num]:
image_id = item1.split('.')[0]
list_file_1.write('%s\n' % (image_id))
list_file_1.close()
list_file_2 = open(root_path + '/val.txt', 'w')
for item2 in images_list[train_num:]:
image_id = item2.split('.')[0]
list_file_2.write('%s\n' % (image_id))
list_file_2.close()
生成train.txt和val.txt两个文件,文件中有数据集的名字。
生成TFRecord数据集
生成TFRecord格式数据集:路径、文件夹需要根据自己实际情况修改。、
python ./build_voc2012_data.py
--image_folder="./output/JPEGImages"
--semantic_segmentation_folder="./output/SegmentationClassPNG"
--list_folder="./txt"
--image_format="jpg"
--output_dir="./tfrecord"
build_voc2012_data.py
# Lint as: python2, python3
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Converts PASCAL VOC 2012 data to TFRecord file format with Example protos.
PASCAL VOC 2012 dataset is expected to have the following directory structure:
+ pascal_voc_seg
- build_data.py
- build_voc2012_data.py (current working directory).
+ VOCdevkit
+ VOC2012
+ JPEGImages
+ SegmentationClass
+ ImageSets
+ Segmentation
+ tfrecord
Image folder:
./VOCdevkit/VOC2012/JPEGImages
Semantic segmentation annotations:
./VOCdevkit/VOC2012/SegmentationClass
list folder:
./VOCdevkit/VOC2012/ImageSets/Segmentation
This script converts data into sharded data files and save at tfrecord folder.
The Example proto contains the following fields:
image/encoded: encoded image content.
image/filename: image filename.
image/format: image file format.
image/height: image height.
image/width: image width.
image/channels: image channels.
image/segmentation/class/encoded: encoded semantic segmentation content.
image/segmentation/class/format: semantic segmentation file format.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import os.path
import sys
import build_data
from six.moves import range
import tensorflow as tf
"""
python ./build_voc2012_data.py \
--image_folder="./output/JPEGImages" \
--semantic_segmentation_folder="./output/SegmentationClassPNG" \
--list_folder="./txt" \
--image_format="jpg" \
--output_dir="./tfrecord"
"""
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('image_folder',
'./VOCdevkit/VOC2012/JPEGImages',
'Folder containing images.')
tf.app.flags.DEFINE_string(
'semantic_segmentation_folder',
'./VOCdevkit/VOC2012/SegmentationClassRaw',
'Folder containing semantic segmentation annotations.')
tf.app.flags.DEFINE_string(
'list_folder',
'./VOCdevkit/VOC2012/ImageSets/Segmentation',
'Folder containing lists for training and validation')
tf.app.flags.DEFINE_string(
'output_dir',
'./tfrecord',
'Path to save converted SSTable of TensorFlow examples.')
_NUM_SHARDS = 4
def _convert_dataset(dataset_split):
"""Converts the specified dataset split to TFRecord format.
Args:
dataset_split: The dataset split (e.g., train, test).
Raises:
RuntimeError: If loaded image and label have different shape.
"""
dataset = os.path.basename(dataset_split)[:-4]
sys.stdout.write('Processing ' + dataset)
filenames = [x.strip('\n') for x in open(dataset_split, 'r')]
num_images = len(filenames)
num_per_shard = int(math.ceil(num_images / _NUM_SHARDS))
image_reader = build_data.ImageReader('jpeg', channels=3)
label_reader = build_data.ImageReader('png', channels=1)
for shard_id in range(_NUM_SHARDS):
output_filename = os.path.join(
FLAGS.output_dir,
'%s-%05d-of-%05d.tfrecord' % (dataset, shard_id, _NUM_SHARDS))
with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
start_idx = shard_id * num_per_shard
end_idx = min((shard_id + 1) * num_per_shard, num_images)
for i in range(start_idx, end_idx):
sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
i + 1, len(filenames), shard_id))
sys.stdout.flush()
# Read the image.
image_filename = os.path.join(
FLAGS.image_folder, filenames[i] + '.' + FLAGS.image_format)
image_data = tf.gfile.GFile(image_filename, 'rb').read()
height, width = image_reader.read_image_dims(image_data)
# Read the semantic segmentation annotation.
seg_filename = os.path.join(
FLAGS.semantic_segmentation_folder,
filenames[i] + '.' + FLAGS.label_format)
seg_data = tf.gfile.GFile(seg_filename, 'rb').read()
seg_height, seg_width = label_reader.read_image_dims(seg_data)
if height != seg_height or width != seg_width:
raise RuntimeError('Shape mismatched between image and label.')
# Convert to tf example.
example = build_data.image_seg_to_tfexample(
image_data, filenames[i], height, width, seg_data)
tfrecord_writer.write(example.SerializeToString())
sys.stdout.write('\n')
sys.stdout.flush()
def main(unused_argv):
dataset_splits = tf.gfile.Glob(os.path.join(FLAGS.list_folder, '*.txt'))
for dataset_split in dataset_splits:
_convert_dataset(dataset_split)
if __name__ == '__main__':
tf.app.run()
最终生成的TFRecord格式数据集:
以上就是整个JSON格式数据集转TFRecord格式的过程,本方法不一定适合所有的数据集,仅供参考;代码中可能有些地方需要根据自己实际情况修改但是未标明,如有更好的方法欢迎评论交流。