使用tf.data读取tfrecors数据集3

最新推荐文章于 2023-12-20 18:19:19 发布

love萌萌loli

最新推荐文章于 2023-12-20 18:19:19 发布

阅读量473

点赞数

分类专栏： numpy tensorflow python

本文链接：https://blog.csdn.net/qwe2508/article/details/80558774

版权

python 同时被 3 个专栏收录

28 篇文章 1 订阅

订阅专栏

tensorflow

17 篇文章 0 订阅

订阅专栏

numpy

2 篇文章 0 订阅

订阅专栏

下面的代码是制作数据集，从之前生成的txt图片读取数据名，然后制作

import os
import tensorflow as tf
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
#这里是数据集图片的文件夹
cwd = r'/home/hehe/python/load_cifar10/datadir/'

writer = tf.python_io.TFRecordWriter('train2.tfrecords') #输出成tfrecord文件

#这里是之前生成的shuffle.txt
filename=r'/home/hehe/python/dataset/list_val.txt'


def _int64_feature(value):
    return tf.train.Feature(int64_list = tf.train.Int64List(value = [value]))

def _bytes_feature(value):
    return tf.train.Feature(bytes_list = tf.train.BytesList(value = [value]))

f = open(filename)
lines = f.read().splitlines()
for ln in lines:
    fname, lab = ln.split(' ')
    img_path=cwd+fname
    img = Image.open(img_path)

    img = img.resize((64, 64))

    img_raw = img.tobytes()
    example = tf.train.Example(features=tf.train.Features(feature={"label": _int64_feature(int(lab)),
                                                                    "img_raw": _bytes_feature(img_raw)
                                                                                   }))

    # print('Image:',img, 'label:',int(lab))
    writer.write(example.SerializeToString())  # 序列化为字符串
writer.close()
print("finish to write data to tfrecord file!")

这种方案就解决了图片中存在灰度图的情况，我觉得应该处理了，但是不知道是否真的如此。

#这一步的作用是制作tfrecords数据集

import os
import tensorflow as tf
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

cwd =  r"/home/hh/jiao/"

writer = tf.python_io.TFRecordWriter('train_224x224.tfrecords') #输出成tfrecord文件


filename=r'/home/hh/python/dataset/list_train.txt'


def _int64_feature(value):
    return tf.train.Feature(int64_list = tf.train.Int64List(value = [value]))

def _bytes_feature(value):
    return tf.train.Feature(bytes_list = tf.train.BytesList(value = [value]))

f = open(filename)
lines = f.read().splitlines()
for ln in lines:
    fname, lab = ln.split(' ')
    # print(fname, lab)
    img_path=cwd+fname
    img = Image.open(img_path)
    try:
        r, g, b = img.split()
        img = img.resize((128, 128))
        # print(int(lab))
        img_raw = img.tobytes()
        example = tf.train.Example(features=tf.train.Features(feature={"label": _int64_feature(int(lab)),
                                                                        "img_raw": _bytes_feature(img_raw)
                                                                                       }))

        # print('Image:',img, 'label:',int(lab))
        writer.write(example.SerializeToString())  # 序列化为字符串
    except ValueError:
        num=0
        num+=1
        print(num)
        img = np.asarray([img for i in range(3)])
        img = img.resize((224, 224))
        img_raw = img.tobytes()
        example = tf.train.Example(features=tf.train.Features(feature={"label": _int64_feature(int(lab)),
                                                                       "img_raw": _bytes_feature(img_raw)
                                                                       }))

        # print('Image:',img, 'label:',int(lab))
        writer.write(example.SerializeToString())  # 序列化为字符串

writer.close()
print("finish to write data to tfrecord file!")