python小工具--tfrecords文件的制作工具

最新推荐文章于 2024-04-17 14:12:11 发布

oyejiji

最新推荐文章于 2024-04-17 14:12:11 发布

阅读量1.4k

点赞数 1

分类专栏：自制的python小工具

本文链接：https://blog.csdn.net/m0_37041325/article/details/74891322

版权

自制的python小工具专栏收录该内容

4 篇文章 0 订阅

订阅专栏

最近一直在捣弄TensorFlow，其中有关数据的读取有很多种方式。因为嫌麻烦，自己就把读取的方式统一为用TensorFlow自己的数据格式也就是tfrecords的格式，所以我自己弄写了个将数据和标签转化为tfrecords格式的python程序。

"""
tfrecord文件的生成程序，能实现将分好类的图像数据
打乱写入tfrecords类型文件中，并自动将数据集分成测试数据
和训练数据
@author:liao
"""
import tensorflow as tf
from PIL import Image
import os
import random
import shutil



#######################如果要改主要是改这里
test_number_ratio=0.6#用于生成测试数据的比例
cwd=os.getcwd()
path=cwd+os.sep+'tfrecoder-test'#path是保存要转化图像数据的所在文件夹的绝对路径
#保存生成的tfrecords数据的文件夹的绝对路径
tf_records_dir=path+os.sep+'tfrecord_file'
#生成的测试tfrecords文件的文件名
tf_train_name='train_emotion_image_data'
#生成的训练tfrecords文件的文件名
tf_test_name='test_emotion_image_data'
########################




#生成的train_tfrecords文件所在路径
tf_train_records_path=tf_records_dir+os.sep+tf_train_name+'.tfrecords'
#生成的test_tfrecords文件所在路径
tf_test_records_path=tf_records_dir+os.sep+tf_test_name+'.tfrecords'


if os.path.exists(tf_records_dir):#如果tf_records_dir文件夹存在
    shutil.rmtree(tf_records_dir)#则递归地删除tf_records_dir文件夹


classname= os.listdir(path)
#    print("root is\n %s"%(root))
classname_path=[]#每个类的文件夹的绝对路径
for x in classname:
    x=path+os.sep+x
    classname_path.append(x)
    
print(classname_path)

train_file_path=[]#用来存放每个训练图片的绝对路径
test_file_path=[]#用来存放每个测试图片的绝对路径

for x in classname_path:
    file_name_list=os.listdir(x)#得到每个类下面的文件的文件名list
    file_name_number=len(file_name_list)#得到每个类下面的图像的数量
    test_file_name_number=int(float(file_name_number)*test_number_ratio)#得到每个类作为测试图像的数量
    test_file_name_path=file_name_list[0:test_file_name_number]#取出测试文件名list
    train_file_name_path=file_name_list[test_file_name_number:]#取出训练文件名list
    for y in train_file_name_path:
        train_file_path.append(x+os.sep+y)#得到训练数据文件的绝对路径
    for y in test_file_name_path:
        test_file_path.append(x+os.sep+y)#得到测试数据文件的绝对路径
    

#print(file_path)
random.shuffle(train_file_path)#打乱训练的文件路径的排序，达到tfrecords乱序的效果
#print(file_path)


#生成tfrecord_file文件夹，用来保存生成的tfrecords文件
os.mkdir(tf_records_dir)

#生成训练的tfrecords数据
image_number=0
TFwriter = tf.python_io.TFRecordWriter(tf_train_records_path)#生成一个train 的TFwriter
for imgPath in train_file_path:
    print(imgPath)
    imgPath_list=imgPath.split("\\")#将文件绝对路径imgPath按“\”分离，得到对应list(不知道这里为什么是双斜杠)
    label=int(imgPath_list[-2][-1])#取出类别
    print("class %s"%(label))#显示类别
#    print(type(label))
    img = Image.open(imgPath)#得到image对象
    print (img.size,img.mode)
    image_number+=1
    print("pictures of train%s"%image_number)#显示处理到第几张
#    os.system("pause")#暂停
    imgRaw = img.tobytes()#转化为字节类型
    example = tf.train.Example(features=tf.train.Features(feature={
    "label":tf.train.Feature(int64_list = tf.train.Int64List(value=[label])),
    "img":tf.train.Feature(bytes_list = tf.train.BytesList(value=[imgRaw]))
    }) )#将标签和图像数据写入example这个实例中
    TFwriter.write(example.SerializeToString())#写入文件中
TFwriter.close()

#生成测试的tfrecords数据
image_number=0
TFwriter = tf.python_io.TFRecordWriter(tf_test_records_path)#生成一个test 的TFwriter
for imgPath in test_file_path:
    print(imgPath)
    imgPath_list=imgPath.split("\\")#将文件绝对路径imgPath按“\”分离，得到对应list(不知道这里为什么是双斜杠)
    label=int(imgPath_list[-2][-1])#取出类别
    print("class %s"%(label))#显示类别
#    print(type(label))
    img = Image.open(imgPath)#得到image对象
    print (img.size,img.mode)
    image_number+=1
    print("pictures of test:%s"%image_number)#显示处理到第几张
#    os.system("pause")#暂停
    imgRaw = img.tobytes()#转化为字节类型
    example = tf.train.Example(features=tf.train.Features(feature={
    "label":tf.train.Feature(int64_list = tf.train.Int64List(value=[label])),
    "img":tf.train.Feature(bytes_list = tf.train.BytesList(value=[imgRaw]))
    }) )#将标签和图像数据写入example这个实例中
    TFwriter.write(example.SerializeToString())#写入文件中
TFwriter.close()

print("\n")
print("test_number_ratio=%s"%(test_number_ratio))
print("total_pictures=%s"%((len(train_file_path)+len(test_file_path))))
print("pictures of train:%s have converted"%(len(train_file_path)))#显示所得到的训练文件的数量
print("pictures of test: %s have converted"%(len(test_file_path)))#显示所得到的测试文件的数量

这个程序主要实现了将分好类别的图像数据转为为tfrecords的数据得，方便使用TensorFlow训练的时候的数据读取。得到的tfrecords的数据分为train_image_data.tfrecords和test_image_data.tfrecords，显然train_image_data.records用于训练，而test_image_data.records用于测试。

修改的话一般只需要修改程序里指定的某些部分就行了，具体看程序。

有关tfrecords格式的相关知识请参考这里http://wiki.jikexueyuan.com/project/tensorflow-zh/how_tos/reading_data.html#AUTOGENERATED-reading-from-files

oyejiji

关注

1
点赞
踩
4

收藏

觉得还不错? 一键收藏
1
评论
python小工具--tfrecords文件的制作工具

最近一直在捣弄TensorFlow，其中有关数据的读取有很多种方式。因为嫌麻烦，自己就把读取的方式统一为用TensorFlow自己的数据格式也就是tfrecords的格式，所以我自己弄写了个将数据和标签转化为tfrecords格式的python程序。"""tfrecord文件的生成程序，能实现将分好类的图像数据打乱写入tfrecords类型文件中，并自动将数据集分成测试数据和训练数据@au
复制链接

扫一扫