在上一篇博客使用Tensorflow制作球鞋识别模型(一)——数据预处理中已经介绍了TFRecord以及如何制作自己的图片数据集,接下来要对输入的图片进行处理,主要是实现对指定大小的生成图片进行sample与label分类制作获得神经网络输入的get_files文件,同时为了方便网络的训练,输入数据进行batch处理。
生成图片路径和标签的List
首先导入所需要的包
import os
import math
import numpy as np
import tensorflow as tf
图片路径和创建列表
train_dir = 'D:/PyCharm/PycharmProjects/AJ_Recognition/data_prepare/pic/inputdata'
AJ1 = []
label_AJ1 = []
AJ4 = []
label_AJ4 = []
AJ11 = []
label_AJ11 = []
AJ12 = []
label_AJ12 = []
获取路径下所有的图片路径名,存放到对应的列表中,同时贴上标签,存放到label列表中。
append() 方法用于在列表末尾添加新的对象。
def get_files(file_dir, ratio):
for file in os.listdir(file_dir + '/AJ1'):
AJ1.append(file_dir + '/AJ1' + '/' + file)
label_AJ1.append(0)
for file in os.listdir(file_dir + '/AJ4'):
AJ4.append(file_dir + '/AJ4' + '/' + file)
label_AJ4.append(1)
for file in os.listdir(file_dir + '/AJ11'):
AJ11.append(file_dir + '/AJ11' + '/' + file)
label_AJ11.append(2)
for file in os.listdir(file_dir + '/AJ12'):
AJ12.append(file_dir + '/AJ12' + '/' + file)
label_AJ12.append(3)
np.hstack():按水平方向(列顺序)堆叠数组构成一个新的数组
np.array():构造函数
temp.transpose():转置
np.random.shuffle():打乱顺序
math.ceil(x):返回大于等于参数x的最小整数
# 对生成的图片路径和标签List做打乱处理把各类合起来组成一个list
image_list = np.hstack((AJ1, AJ4, AJ11, AJ12))
label_list = np.hstack((label_AJ1, label_AJ4, label_AJ11, label_AJ12))
# 利用shuffle打乱顺序
temp = np.array([image_list, label_list])
temp = temp.transpose()
np.random.shuffle(temp)
# 将所有的img和lab转换成list
all_image_list = list(temp[:, 0])
all_label_list = list(temp[:, 1])
# 将所得List分为两部分,一部分用来训练,一部分用来测试,ratio是测试集的比例
n_sample = len(all_label_list)
# 测试样本数
n_val = int(math.ceil(n_sample * ratio))
# 训练样本数
n_train = n_sample - n_val
tra_images = all_image_list[0:n_train]
tra_labels = all_label_list[0:n_train]
tra_labels = [int(float(i)) for i in tra_labels]
val_images = all_image_list[n_train:-1]
val_labels = all_label_list[n_train:-1]
val_labels = [int(float(i)) for i in val_labels]
return tra_images, tra_labels, val_images, val_labels
生成Batach
def get_batch(image, label, image_W, image_H, batch_size, capacity):
# 转换类型
image = tf.cast(image, tf.string)
label = tf.cast(label, tf.int32)
# 输入队列
input_queue = tf.train.slice_input_producer([image, label])
label = input_queue[1]
image_contents = tf.read_file(input_queue[0]) # read img from a queue
#将图像解码,不同类型的图像不能混在一起
image = tf.image.decode_jpeg(image_contents, channels=3)
# 数据预处理,对图像进行旋转、缩放、裁剪、归一化等操作
image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)
image = tf.image.per_image_standardization(image)
#生成batch
image_batch, label_batch = tf.train.batch([image, label],batch_size=batch_size,capacity=capacity)
# 重新排列label,行数为[batch_size]
label_batch = tf.reshape(label_batch, [batch_size])
image_batch = tf.cast(image_batch, tf.float32)
return image_batch, label_batch
全部代码
import os
import math
import numpy as np
import tensorflow as tf
train_dir = 'D:/PyCharm/PycharmProjects/AJ_Recognition/data_prepare/pic/inputdata'
AJ1 = []
label_AJ1 = []
AJ4 = []
label_AJ4 = []
AJ11 = []
label_AJ11 = []
AJ12 = []
label_AJ12 = []
def get_files(file_dir, ratio):
for file in os.listdir(file_dir + '/AJ1'):
AJ1.append(file_dir + '/AJ1' + '/' + file)
label_AJ1.append(0)
for file in os.listdir(file_dir + '/AJ4'):
AJ4.append(file_dir + '/AJ4' + '/' + file)
label_AJ4.append(1)
for file in os.listdir(file_dir + '/AJ11'):
AJ11.append(file_dir + '/AJ11' + '/' + file)
label_AJ11.append(2)
for file in os.listdir(file_dir + '/AJ12'):
AJ12.append(file_dir + '/AJ12' + '/' + file)
label_AJ12.append(3)
image_list = np.hstack((AJ1, AJ4, AJ11, AJ12))
label_list = np.hstack((label_AJ1, label_AJ4, label_AJ11, label_AJ12))
temp = np.array([image_list, label_list])
temp = temp.transpose()
np.random.shuffle(temp)
all_image_list = list(temp[:, 0])
all_label_list = list(temp[:, 1])
n_sample = len(all_label_list)
n_val = int(math.ceil(n_sample * ratio))
n_train = n_sample - n_val
tra_images = all_image_list[0:n_train]
tra_labels = all_label_list[0:n_train]
tra_labels = [int(float(i)) for i in tra_labels]
val_images = all_image_list[n_train:-1]
val_labels = all_label_list[n_train:-1]
val_labels = [int(float(i)) for i in val_labels]
return tra_images, tra_labels, val_images, val_labels
def get_batch(image, label, image_W, image_H, batch_size, capacity):
image = tf.cast(image, tf.string)
label = tf.cast(label, tf.int32)
input_queue = tf.train.slice_input_producer([image, label])
label = input_queue[1]
image_contents = tf.read_file(input_queue[0])
image = tf.image.decode_jpeg(image_contents, channels=3)
image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)
image = tf.image.per_image_standardization(image)
image_batch, label_batch = tf.train.batch([image, label],
batch_size=batch_size,
num_threads=32,
capacity=capacity)
label_batch = tf.reshape(label_batch, [batch_size])
image_batch = tf.cast(image_batch, tf.float32)
return image_batch, label_batch
项目代码
GitHub地址:https://github.com/WellTung666/Tensorflow/tree/master/AJ_Recognition