机器学习.数据处理.将图片数据转化为TFRecorder

18 篇文章 1 订阅
12 篇文章 0 订阅

把数据集:

   ./data/cat_and_dog/train_r/

      |--cat/

      |--dog/

获取./data/cat_and_dog/train/目录下的的所有文件,并按文件夹标注:cat=0,dog=1

import cv2
import os
import numpy as np
import tensorflow as tf
import io


def get_file(file_dir):
    images = []
    temp = []
    for root, sub_folders, files in os.walk(file_dir):
        # image directories
        for name in files:
            images.append(os.path.join(root, name))
        # get 10 sub-folder names
        for name in sub_folders:
            temp.append(os.path.join(root, name))
        #print(files)
    # assign 10 labes based on the folder names
    labels = []
    for one_folder in temp:
        n_img = len(os.listdir(one_folder))
        letter = one_folder.split('\\')[-1]
        if letter == 'cat':
            labels = np.append(labels, n_img*[0])
        else:
            labels = np.append(labels, n_img*[1])

    # shuffle
    temp = np.array([images, labels])
    temp = temp.transpose()
    np.random.shuffle(temp)

    image_list = list(temp[:, 0])
    label_list = list(temp[:, 1])
    label_list = [int(float(i)) for i in label_list]

    return image_list, label_list

if __name__ == "__main__":
    image_list, label_list = get_file("./data/cat_and_dog/train_r")
    #print(image_list)
    #print(label_list)

上面的代码段中,首先是对数据集文件的位置进行读取,之后根据文件夹名称的不同将牌不同文件夹中的图片标签设置为0或者1,如果有更多分类的话可以依据这个格式设置更多的标签类。之后使用创建的数组对所读取的文件位置和标签进行保存,而Numpy对数组的调整重构了存储有对应文件位置和文件标签的矩阵,并将其返回。

  在获取图片数据文件位置和图片标签之后即可通过相应的程序对其进行读取,并生成专用的TFRecord的数据集。

import cv2
import os
import numpy as np
import tensorflow as tf


def resize_img(in_dir, out_dir=None, dim=(227, 227)):
    '''参考《TensorFlow深度学习应用实践》p171'''
    os.makedirs(out_dir, exist_ok=True)
    for file in os.listdir(in_dir):
        filepath = os.path.join(in_dir, file)
        try:
            image = cv2.imread(filepath)
            resized = cv2.resize(image, dim)
            path = os.path.join(out_dir, file)
            cv2.imwrite(path, resized)
        except:
            print("【图片无法转换】:", filepath)
            #os.remove(filepath)

    cv2.waitKey(0)


def resize_img_test():
    resize_img("./data/cat_and_dog/train/dog/", "./data/cat_and_dog/train_r/dog/")


def get_file(file_dir):
    images = []
    temp = []
    for root, sub_folders, files in os.walk(file_dir):
        # image directories
        for name in files:
            images.append(os.path.join(root, name))
        # get 10 sub-folder names
        for name in sub_folders:
            temp.append(os.path.join(root, name))
        #print(files)
    # assign 10 labes based on the folder names
    labels = []
    for one_folder in temp:
        n_img = len(os.listdir(one_folder))
        letter = one_folder.split('\\')[-1]
        if letter == 'cat':
            labels = np.append(labels, n_img*[0])
        else:
            labels = np.append(labels, n_img*[1])

    # shuffle
    temp = np.array([images, labels])
    temp = temp.transpose()
    np.random.shuffle(temp)

    image_list = list(temp[:, 0])
    label_list = list(temp[:, 1])
    label_list = [int(float(i)) for i in label_list]

    return image_list, label_list


def get_file_test():
    image_list, label_list = get_file("./data/cat_and_dog/train_r")


def convert_to_tfrecord(images_list, labels_list, save_dir, name):
    filename = os.path.join(save_dir, name + '.tfrecodes')
    n_samples = len(labels_list)
    writer = tf.python_io.TFRecordWriter(filename)
    print("\nTransform start......(%d in total)", n_samples)
    for i in np.arange(0, n_samples):
        try:
            image = cv2.imread(images_list[i]) # type(image) must be array!
            image_raw = image.tostring()
            label = int(labels_list[i])
            example = tf.train.Example(features=tf.train.Features(
                                                feature={''
                                                    'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[label])),
                                                    'image_aw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_raw]))
                                                }))
            writer.write(example.SerializeToString())
        except IOError as e:
            print("Could not read:", images_list[i])
    writer.close()
    print("Transform done!")


def convert_to_tfrecord_test():
    images_list, labels_list = get_file("./data/cat_and_dog/train_r")
    convert_to_tfrecord(images_list, labels_list, "./data/cat_and_dog/", "cat_and_dog_train_r")

if __name__ == "__main__":
    convert_to_tfrecord_test()

当生成完数据集后,在神经网络使用数据集进行训练时,需要一个方法将数据从数据集中取出,下面的代码段完成了数据读取的功能。

import tensorflow as tf


def read_and_decode(tfrecords_file, batch_size):
    filename_queue = tf.train.string_input_producer([tfrecords_file])

    reader = tf.TFRecordReader()
    _, serialized_exampple = reader.read(filename_queue)
    img_features = tf.parse_single_example(serialized_exampple,
                                           features={
                                               'label': tf.FixedLenFeature([], tf.int64),
                                               'image_raw': tf.FixedLenFeature([], tf.string),
                                           })
    image = tf.decode_raw(img_features['image_raw'], tf.uint8)


    image = tf.reshape(image, [227, 227, 3])
    label = tf.cast(img_features['label'], tf.int32)
    image_batch, label_batch = tf.train.shuffle_batch([image, label],
                                                      batch_size=batch_size,
                                                      min_after_dequeue=100,
                                                      num_threads=64,
                                                      capacity=200)

    return image_batch, tf.reshape(label_batch, [batch_size])


if __name__ == "__main__":
    image_batch, label_batch= read_and_decode("./data/cat_and_dog/cat_and_dog_train_r.tfrecodes", batch_size=100)


  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值