TensorFlow学习笔记-1--tf.data.Dataset 加载训练数据(kaggle 猫狗训练集):

本文链接：https://blog.csdn.net/qq_38866465/article/details/115675467
由于技术不是很好，所以就写点博客来记录自己的 “学习” 历程。使用的是TensorFLow1.x。使用tf.data.Dataset来加载训练数据:
参考文章：https://www.jianshu.com/p/ceb9f9185b20
import cv2,os,csv
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
 
 
# 指定使用的那块GPU
os.environ['CUDA_VISIBLE_DEVICES']='0'
tf.device('/gpu:0')
 
def my_model(images, N_C):
    print("image.shape: ",images.shape)
    conv1_1 = tf.layers.conv2d(images,
                               32,  # output channel
                               (3, 3),  # kernel size
                               padding='same',  # valid
                               activation=tf.nn.relu,
                              name='conv1_1'
                               )  # 32 * 32
    print("conv1_1.shape: ", conv1_1.shape)
    conv1_2 = tf.layers.conv2d(conv1_1,
                               32,  # output channel
                               (3, 3),  # kernel size
                               padding='same',  # valid
                               activation=tf.nn.relu,
                               name='conv1_2')
 
    print("conv1_2.shape: ", conv1_2.shape)
    conv1_3 = tf.layers.conv2d(conv1_2,
                               32,  # output channel
                               (1, 1),  # kernel size
                               padding='same',  # valid
                               activation=tf.nn.relu,
                               name='conv1_3')  # 32 * 32
    print("conv1_3.shape: ", conv1_3.shape)
    pooling1 = tf.layers.max_pooling2d(conv1_3,
                                       (2, 2),  # kernel size
                                       (2, 2),  # stride size
                                       name='pooling1')
 
    print("pooling1.shape: ", pooling1.shape)
    conv2_1 = tf.layers.conv2d(pooling1,
                               64,  # output channel
                               (3, 3),  # kernel size
                               padding='same',  # valid
                               activation=tf.nn.relu,
                               name='conv2_1')
    print("conv2_1.shape: ", conv2_1.shape)
    conv2_2 = tf.layers.conv2d(conv2_1,
                               64,  # output channel
                               (3, 3),  # kernel size
                               padding='same',  # valid
                               activation=tf.nn.relu,
                               name='conv2_2')
    print("conv2_2.shape: ", conv2_2.shape)
    conv2_3 = tf.layers.conv2d(conv2_2,
                               32,  # output channel
                               (1, 1),  # kernel size
                               padding='same',  # valid
                               activation=tf.nn.relu,
                               name='conv2_3')
 
    print("conv2_3.shape: ", conv2_3.shape)
    pooling2 = tf.layers.max_pooling2d(conv2_3,
                                       (2, 2),  # kernel size
                                       (2, 2),  # stride size
                                       name='pooling2')
 
    print("pooling2.shape: ", pooling2.shape)
    conv3_1 = tf.layers.conv2d(pooling2,
                               128,  # output channel
                               (3, 3),  # kernel size
                               padding='same',  # valid
                               activation=tf.nn.relu,
                               name='conv3_1')
    print("conv3_1.shape: ", conv3_1.shape)
    conv3_2 = tf.layers.conv2d(conv3_1,
                               128,  # output channel
                               (3, 3),  # kernel size
                               padding='same',  # valid
                               activation=tf.nn.relu,
                               name='conv3_2')
    print("conv3_2.shape: ", conv3_2.shape)
    conv3_3 = tf.layers.conv2d(conv3_2,
                               32,  # output channel
                               (1, 1),  # kernel size
                               padding='same',  # valid
                               activation=tf.nn.relu,
                               name='conv3_3')
 
    print("conv3_3.shape: ", conv3_3.shape)
    pooling3 = tf.layers.max_pooling2d(conv3_3,
                                       (2, 2),  # kernel size
                                       (2, 2),  # stride size
                                       name='pooling3')
 
    print("pooling3.shape: ", pooling3.shape)
    batch_norm = tf.layers.batch_normalization(pooling3)
 
    flatten = tf.layers.flatten(batch_norm)
    print("flatten: ", flatten.shape)
 
    dense = tf.layers.dense(flatten, 1000)
    print("dense: ", dense.shape)
    dropout = tf.layers.dropout(dense)
    logits = tf.layers.dense(dropout, N_C)
    print("logit: ", logits.shape)
    return logits
 
 
 
class ImageData:
    def __init__(self, data_root, batch_size, image_size, one_hot=False):
        self.data_root = data_root
        self.image_size = image_size #(224, 224)
        self.one_hot = one_hot
        self.img_path = []  #图片路径list
        self.labels = []    #图片对应标签list
 
        self.read_file() #该方式是直接从文件夹中读取，可以换成从txt， csv文件读取
 
 
        self.castTensor()
        self.batch_size = batch_size
        self.data = tf.data.Dataset.from_tensor_slices((self.image_tensor, self.label_tensor))
        self.data = self.data.map(self.parse_function, num_parallel_calls=8)
 
        self.data.shuffle(buffer_size=batch_size * 2) # 将数据打乱，数值越大，混乱程度越大
        # self.data = self.data.batch(batch_size=self.batch_size)
        # self.data = self.data.repeat()
        self.data = self.data.repeat().batch(self.batch_size)
 
    def read_file(self):
        cats = []
        label_cats = []
        dogs = []
        label_dogs = []
        for file in os.listdir(self.data_root):
            name = file.split(sep='_')
            if 'cat' in name[0]:
                cats.append(self.data_root + file)
                label_cats.append(0)
            if 'dog' in name[0]:
                dogs.append(self.data_root + file)
                label_dogs.append(1)
        assert len(cats)==len(label_cats)
        assert len(dogs)==len(label_dogs)
        #
        image_list = np.hstack((cats, dogs))
        label_list = np.hstack((label_cats, label_dogs))
        print('There are %d cats\nThere are %d dogs' %(len(cats), len(dogs)))
        temp = np.array([image_list, label_list])
        temp = temp.transpose()
        # # 打乱顺序
        np.random.shuffle(temp)
        # # 取出第一个元素作为 image 第二个元素作为 label
        self.img_path = list(temp[:, 0])
        label_list = list(temp[:, 1])
        self.labels = [int(i) for i in label_list]
 
    def read_csv(self):
        data_path = "/home/dzf/yz/dataset/classfiar/cat_dog/bird/train_set/"
        with open(self.data_root, 'r') as f:
            reader = csv.reader(f)
            for row in reader:
                assert int(row[0].split(".")[0]) == int(row[1])
 
                img_path = data_path + row[0]
                self.img_path.append(img_path)
                self.labels.append(int(row[1]))
 
        assert len(self.img_path) == len(self.labels)
    def read_txt(self):
        for line in open(self.data_root, "r"):
            # 一定要用strip,因为原txt文件每行后面会带‘\n‘字符；
            items = line.strip().split(" ")
            self.img_path.append(items[0])
            self.labels.append(int(items[1]))
 
        assert len(self.img_path) == len(self.labels)
        print("num of datas = ", len(self.img_path))
 
    def castTensor(self):
        self.image_tensor = tf.convert_to_tensor(self.img_path, dtype=tf.string)
        self.label_tensor = tf.convert_to_tensor(self.labels, dtype=tf.int32)
        return self.image_tensor, self.label_tensor
 
    def parse_function(self, filename, label_):
        print("parse_function（） filename===>",filename)
        print("parse_function（） label_===>",label_)
        #one_hot
        # if self.one_hot:
        #     label_ = tf.one_hot(label_, 2)
 
        img_string = tf.read_file(filename)
        img_decoded_bgr = tf.image.decode_jpeg(img_string, channels=3) #格式为BGR
        img_bgr = tf.image.convert_image_dtype(img_decoded_bgr, dtype=tf.float32)
        # method=0 双线性插值法; method=1 最近邻居法; method=2 双三次插值法; method=3 面积插值法
        img = tf.image.resize_images(img_bgr, [self.image_size[0], self.image_size[1]], method=1)
        # img = img/255.0  # 归一化
        img = self.augment_dataset(img) # 数据增强
        return img, label_
 
    def augment_dataset(self, image):
        # img = tf.subtract(image, VGG_MEAN)
        # 按水平 (从左向右) 随机翻转图像.
        img_flip = tf.image.random_flip_left_right(image)
 
        # 在[-max_delta, max_delta)的范围随机调整图片的亮度。
        img_brightness = tf.image.random_brightness(img_flip, max_delta=63)
 
        # 在[lower, upper]的范围随机调整图的对比度。
        img_contrast = tf.image.random_contrast(img_brightness, lower=0.2, upper=1.8)
 
        # 在[-max_delta, max_delta]的范围随机调整图片的色相。max_delta的取值在[0, 0.5]之间。
        img_random_hue = tf.image.random_hue(img_contrast, max_delta=0.5)
 
        # 在[lower, upper]的范围随机调整图的饱和度。
        img_random_saturation = tf.image.random_saturation(img_random_hue, lower=0, upper=0.5)
 
        # 将代表一张图片的三维矩阵中的数字均值变为0，方差变为1。
        float_image = tf.image.per_image_standardization(img_random_saturation)
        return float_image
 
 
 
if __name__ == '__main__':
    batch_size = 24
    LEARNING_RATE = 0.001
    TRAIN_STEP = 10000
    TEST_STEP = 5000
    logs_train_dir = './log/train_log/'  # 这个目录会自动生成
    model_train_dir = './log/model/'  # 这个目录会自动生成
    file_path = "./cat_dog/train/" #该路径为您的图片路径（这里使用的kaggle的猫狗训练数据）
    dataset = ImageData(data_root=file_path, batch_size=batch_size, image_size=(224, 224))
 
    iterator = dataset.data.make_one_shot_iterator()
    batch_images, batch_labels = iterator.get_next()
    print("batch_images.shape, batch_labels.shape")
    print(batch_images.shape, batch_labels.shape)
 
    train_logits = my_model(batch_images, 2)
    softmax_y = tf.nn.softmax(train_logits)  # 计算每一类的概率
    predict = tf.argmax(softmax_y, 1)
 
    loss = tf.losses.sparse_softmax_cross_entropy(labels=batch_labels, logits=train_logits)
    acc = tf.reduce_mean(tf.cast(tf.equal(predict, tf.cast(batch_labels, tf.int64)), tf.float32))
    train_op = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)
 
    tf.summary.scalar("loss", loss)
    summary_op = tf.summary.merge_all()  # log汇总记录
    saver = tf.train.Saver(max_to_keep=3)
 
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
        sess.run(init)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
 
        # ckpt = tf.train.get_checkpoint_state(model_train_dir)
        # if ckpt and ckpt.model_checkpoint_path:
        #     print("找到checkpoint，断点续训开始")
        #     saver.restore(sess, ckpt.model_checkpoint_path)
        # else:
        #     print("从零开始训练")
        acc_list = []
 
        image, label = sess.run([batch_images, batch_labels])
        print("image.shape:", image.shape)
        for step in range(TRAIN_STEP):
            eval_ops = [loss, acc, train_op]
 
            eval_ops_results, predict_1, label_1, softmax_1 = sess.run([eval_ops,predict, batch_labels, softmax_y])
            loss_val, train_acc = eval_ops_results[0:2]
            acc_list.append(train_acc)
 
            if (step + 1) % 100 == 0:
                summary_str = sess.run(summary_op)
                train_writer.add_summary(summary_str, step)
 
                acc_mean = np.mean(acc_list)
                print('step:{0},loss:{1:.5},acc:{2:.5},acc_mean:{3:.5}'.format(
                    step + 1, loss_val, train_acc, acc_mean
                ))
                print("label  :",label_1)
                print("predict:",predict_1)
                # print("softmax:",softmax_1)
                print("\n")
            if step % 2000 == 0 or (step + 1) == TRAIN_STEP:
                checkpoint_path = os.path.join(model_train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
 
        coord.request_stop()
        coord.join(threads)
未完待续。。。