Tensorflow自定义数据集实战

数据集

宝可梦数据集

步骤
  • load data 加载数据
    • images and labels X = [1.png, 2.png,…] Y = [4, 9, …]
    • tf.data.Dataset.from_tensor_slices((X,Y))
    • .shuffle().map(preprocess).batch()
    • 数据增强与标准化
  • build model 建立模型
    • 定义网络结构
    • 定义前向传播
  • train-val-test 训练与测试
  • transfer learning 迁移学习
宝可梦数据集实战
数据集
# pokemon.py
import  os, glob
import  random, csv
import tensorflow as tf


def load_csv(root, filename, name2label):
    # root:数据集根目录
    # filename:csv文件名
    # name2label:类别名编码表,类别:数字
    if not os.path.exists(os.path.join(root, filename)):
        images = []
        for name in name2label.keys():
            # 'pokemon\\mewtwo\\00001.png
            images += glob.glob(os.path.join(root, name, '*.png'))
            images += glob.glob(os.path.join(root, name, '*.jpg'))
            images += glob.glob(os.path.join(root, name, '*.jpeg'))

        # 1167, 'pokemon\\bulbasaur\\00000000.png'
        # print(len(images), images)

        random.shuffle(images)
        with open(os.path.join(root, filename), mode='w', newline='') as f:
            writer = csv.writer(f)
            for img in images:  # 'pokemon\\bulbasaur\\00000000.png'
                name = img.split(os.sep)[-2]
                label = name2label[name]
                # 'pokemon\\bulbasaur\\00000000.png', 0
                writer.writerow([img, label])
            print('written into csv file:', filename)

    # read from csv file
    images, labels = [], []
    with open(os.path.join(root, filename)) as f:
        reader = csv.reader(f)
        for row in reader:
            # 'pokemon\\bulbasaur\\00000000.png', 0
            img, label = row
            label = int(label)

            images.append(img)
            labels.append(label)

    assert len(images) == len(labels)
    return images, labels


def load_pokemon(root, mode='train'):
    # 创建数字编码表
    name2label = {}  # "类别":数字
    for name in sorted(os.listdir(os.path.join(root))):
        if not os.path.isdir(os.path.join(root, name)):
            continue
        # 给每个类别编码一个数字
        name2label[name] = len(name2label.keys())

    # 读取Label信息
    # [file1,file2,...], [3,1,...]
    images, labels = load_csv(root, 'images.csv', name2label)

    if mode == 'train':  # 60%
        images = images[:int(0.6 * len(images))]
        labels = labels[:int(0.6 * len(labels))]
    elif mode == 'val':  # 20% = 60%->80%
        images = images[int(0.6 * len(images)):int(0.8 * len(images))]
        labels = labels[int(0.6 * len(labels)):int(0.8 * len(labels))]
    else:  # 20% = 80%->100%
        images = images[int(0.8 * len(images)):]
        labels = labels[int(0.8 * len(labels)):]

    return images, labels, name2label

# imagenet数据集所有图像的均值与方差
# img_mean = tf.constant([0.485, 0.456, 0.406])
# img_std = tf.constant([0.229, 0.224, 0.225])

def normalize(x, mean=img_mean, std=img_std):       # 标准化到0-1
    # x:[224, 224, 3] mean:[224, 224, 3], std:[3]
    x = (x - mean) / std
    return x

def denormalize(x, mean=img_mean, std=img_std):     # 处理回原图,方便可视化
    x = x * std + mean
    return x

def preprocess(x,y):
    # x: 图片的路径,y:图片的数字编码
    x = tf.io.read_file(x)  # 把图片路径转变为图片对象
    x = tf.image.decode_jpeg(x, channels=3) # 将jpg格式解码成图片的tensor格式
    x = tf.image.resize(x, [244, 244])  # 放大.  如果不想做裁剪直接resize到224即可

    # data augmentation 数据增强
    # x = tf.image.random_flip_up_down(x) 随机进行上下翻转
    # x= tf.image.random_flip_left_right(x) 随机进行左右翻转
    x = tf.image.random_crop(x, [224, 224, 3]) # 裁剪.

    # x: [0,255]=> 0~1 之间
    x = tf.cast(x, dtype=tf.float32) / 255.
    # 标准化到 N(0,1) 的正态分布
    x = normalize(x)
    y = tf.convert_to_tensor(y)
    return x, y


if __name__ == '__main__':
    main()
模型结构
# resnet.py
import  os
import  tensorflow as tf
import  numpy as np
from    tensorflow import keras
from    tensorflow.keras import layers

tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')

class ResnetBlock(keras.Model):
    def __init__(self, channels, strides=1):
        super(ResnetBlock, self).__init__()

        self.channels = channels
        self.strides = strides

        self.conv1 = layers.Conv2D(channels, 3, strides=strides,padding=[[0,0],[1,1],[1,1],[0,0]])
        self.bn1 = keras.layers.BatchNormalization()
        self.conv2 = layers.Conv2D(channels, 3, strides=1, padding=[[0,0],[1,1],[1,1],[0,0]])
        self.bn2 = keras.layers.BatchNormalization()

        if strides!=1:
            self.down_conv = layers.Conv2D(channels, 1, strides=strides, padding='valid')
            self.down_bn = tf.keras.layers.BatchNormalization()

    def call(self, inputs, training=None):
        residual = inputs

        x = self.conv1(inputs)
        x = tf.nn.relu(x)
        x = self.bn1(x, training=training)
        x = self.conv2(x)
        x = tf.nn.relu(x)
        x = self.bn2(x, training=training)

        # 残差连接
        if self.strides!=1:
            residual = self.down_conv(inputs)
            residual = tf.nn.relu(residual)
            residual = self.down_bn(residual, training=training)

        x = x + residual
        x = tf.nn.relu(x)
        return x


class ResNet(keras.Model):
    def __init__(self, num_classes, initial_filters=16, **kwargs):
        super(ResNet, self).__init__(**kwargs)

        self.stem = layers.Conv2D(initial_filters, 3, strides=3, padding='valid')

        self.blocks = keras.models.Sequential([
            ResnetBlock(initial_filters * 2, strides=3),
            ResnetBlock(initial_filters * 2, strides=1),
            # layers.Dropout(rate=0.5) 防止过拟合
            ResnetBlock(initial_filters * 4, strides=3),
            ResnetBlock(initial_filters * 4, strides=1),

            ResnetBlock(initial_filters * 8, strides=2),
            ResnetBlock(initial_filters * 8, strides=1),

            ResnetBlock(initial_filters * 16, strides=2),
            ResnetBlock(initial_filters * 16, strides=1),
        ])

        self.final_bn = layers.BatchNormalization()
        self.avg_pool = layers.GlobalMaxPool2D()
        self.fc = layers.Dense(num_classes)

    def call(self, inputs, training=None):

        out = self.stem(inputs,training=training)
        out = tf.nn.relu(out)

        out = self.blocks(out, training=training)

        out = self.final_bn(out, training=training)
        out = self.avg_pool(out)
        out = self.fc(out)
        return out


if __name__ == '__main__':
    main()
训练与测试
# train.py
import  os
import  tensorflow as tf
import  numpy as np
from    tensorflow import keras
from    tensorflow.keras import layers,optimizers,losses
from    tensorflow.keras.callbacks import EarlyStopping

from    pokemon import load_pokemon, normalize, denormalize
from    resnet import ResNet

tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')

def preprocess(x,y):
    # x: 图片的路径,y:图片的数字编码
    x = tf.io.read_file(x)  # 把图片路径转变为图片对象
    x = tf.image.decode_jpeg(x, channels=3) # 将jpg格式解码成图片的tensor格式
    x = tf.image.resize(x, [244, 244])  # 放大.  如果不想做裁剪直接resize到224即可

    # data augmentation 数据增强
    # x = tf.image.random_flip_up_down(x) 随机进行上下翻转
    # x= tf.image.random_flip_left_right(x) 随机进行左右翻转
    x = tf.image.random_crop(x, [224, 224, 3]) # 裁剪.

    # x: [0,255]=> 0~1 之间
    x = tf.cast(x, dtype=tf.float32) / 255.
    # 标准化到 N(0,1) 的正态分布
    x = normalize(x)
    y = tf.convert_to_tensor(y)
    return x, y

batchsz = 256
# creat train database
images, labels, table = load_pokemon('pokemon',mode='train')
db_train = tf.data.Dataset.from_tensor_slices((images, labels))
db_train = db_train.shuffle(1000).map(preprocess).batch(batchsz)
# crate validation database
images2, labels2, table = load_pokemon('pokemon',mode='val')
db_val = tf.data.Dataset.from_tensor_slices((images2, labels2))
db_val = db_val.map(preprocess).batch(batchsz)
# create test database
images3, labels3, table = load_pokemon('pokemon',mode='test')
db_test = tf.data.Dataset.from_tensor_slices((images3, labels3))
db_test = db_test.map(preprocess).batch(batchsz)

'''
当数据集过小时,可以尝试这个网络
resnet = keras.Sequential([
    layers.Conv2D(16,5,3),
    layers.MaxPool2D(3,3),
    layers.ReLU(),
    layers.Conv2D(64,5,3),
    layers.MaxPool2D(2,2),
    layers.ReLU(),
    layers.Flatten(),
    layers.Dense(64),
    layers.ReLU(),
    layers.Dense(5)
])
'''

resnet = ResNet(5)
resnet.build(input_shape=(4, 224, 224, 3))
resnet.summary()

early_stopping = EarlyStopping(
    monitor='val_accuracy',	# 监听validation的准确率指标
    min_delta=0.001,
    patience=5				# 若连续5个epoch没有增加0.001,则停止训练
) 

resnet.compile(optimizer=optimizers.Adam(lr=1e-3),
               loss=losses.CategoricalCrossentropy(from_logits=True),
               metrics=['accuracy'])
resnet.fit(db_train, validation_data=db_val, validation_freq=1, epochs=100,
           callbacks=[early_stopping])
resnet.evaluate(db_test)
迁移学习
# transfer.py
import  os
import  tensorflow as tf
import  numpy as np
from    tensorflow import keras
from    tensorflow.keras import layers,optimizers,losses
from    tensorflow.keras.callbacks import EarlyStopping

from pokemon import  load_pokemon,normalize

tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')

def preprocess(x,y):
    # x: 图片的路径,y:图片的数字编码
    x = tf.io.read_file(x)  # 把图片路径转变为图片对象
    x = tf.image.decode_jpeg(x, channels=3) # 将jpg格式解码成图片的tensor格式
    x = tf.image.resize(x, [244, 244])  # 放大.  如果不想做裁剪直接resize到224即可

    # data augmentation 数据增强
    # x = tf.image.random_flip_up_down(x) 随机进行上下翻转
    # x= tf.image.random_flip_left_right(x) 随机进行左右翻转
    x = tf.image.random_crop(x, [224, 224, 3]) # 裁剪.

    # x: [0,255]=> 0~1 之间
    x = tf.cast(x, dtype=tf.float32) / 255.
    # 标准化到 N(0,1) 的正态分布
    x = normalize(x)
    y = tf.convert_to_tensor(y)
    return x, y

batchsz = 256
# creat train database
images, labels, table = load_pokemon('pokemon',mode='train')
db_train = tf.data.Dataset.from_tensor_slices((images, labels))
db_train = db_train.shuffle(1000).map(preprocess).batch(batchsz)
# crate validation database
images2, labels2, table = load_pokemon('pokemon',mode='val')
db_val = tf.data.Dataset.from_tensor_slices((images2, labels2))
db_val = db_val.map(preprocess).batch(batchsz)
# create test database
images3, labels3, table = load_pokemon('pokemon',mode='test')
db_test = tf.data.Dataset.from_tensor_slices((images3, labels3))
db_test = db_test.map(preprocess).batch(batchsz)

# 使用VGG19网络进行fine-tuning
# 导入vgg网络与其在imagenet上训练好的权值,include_top=False代表输出层删除,选择最大池化层
vgg = keras.applications.VGG19(weights='imagenet', include_top=False, pooling='max') 
vgg.trainable = False     # 冻结该部分网络参数

net = keras.Sequential([
    vgg,
    layers.Dense(5) # 只训练该层
])
net.build(input_shape=(4,224,224,3))
net.summary()

early_stopping = EarlyStopping(
    monitor='val_accuracy', # 监听validation的准确率指标
    min_delta=0.001,
    patience=5              # 若连续5个epoch没有增加0.001,则停止训练
) 

resnet.compile(optimizer=optimizers.Adam(lr=1e-3),
               loss=losses.CategoricalCrossentropy(from_logits=True),
               metrics=['accuracy'])
resnet.fit(db_train, validation_data=db_val, validation_freq=1, epochs=100,
           callbacks=[early_stopping])
resnet.evaluate(db_test)
  • 0
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值