深度学习搭建自己的CNN并在.cifar10上训练

如果没有gpu的同学可以使用google 的 google colab 有免费的gpu算力 ,简单好用,教程百度一大把

 residual network on cifar 10 

import tensorflow as tf
import numpy as np
import pickle as p
from tqdm import tqdm
import os
import cv2
import time
from tensorflow.keras import models, optimizers, regularizers
from tensorflow.keras.layers import Conv2D, AveragePooling2D, BatchNormalization, Flatten, Dense, Input, add, Activation
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

# network config
stack_n = 18  # layers = stack_n * 6 + 2
weight_decay = 1e-4

# training config
batch_size = 128
train_num = 50000
iterations_per_epoch = int(train_num / batch_size)
learning_rate = [0.1, 0.01, 0.001]
boundaries = [80 * iterations_per_epoch, 120 * iterations_per_epoch]
epoch_num = 200

# test config
test_batch_size = 200
test_num = 10000
test_iterations = int(test_num / test_batch_size)

def load_CIFAR_batch(filename):
    """ load single batch of cifar """
    with open(filename, 'rb')as f:
        datadict = p.load(f, encoding='iso-8859-1')
        X = datadict['data']
        Y = datadict['labels']
        X = X.reshape(10000, 3, 32, 32)
        Y = np.array(Y)
        return X, Y


def load_CIFAR(Foldername):
    train_data = np.zeros([50000, 32, 32, 3], dtype=np.float32)
    train_label = np.zeros([50000, 10], dtype=np.float32)
    test_data = np.zeros([10000, 32, 32, 3], dtype=np.float32)
    test_label = np.zeros([10000, 10], dtype=np.float32)

    for sample in range(5):
        X, Y = load_CIFAR_batch(Foldername + "/data_batch_" + str(sample + 1))

        for i in range(3):
            train_data[10000 * sample:10000 * (sample + 1), :, :, i] = X[:, i, :, :]
        for i in range(10000):
            train_label[i + 10000 * sample][Y[i]] = 1

    X, Y = load_CIFAR_batch(Foldername + "/test_batch")
    for i in range(3):
        test_data[:, :, :, i] = X[:, i, :, :]
    for i in range(10000):
        test_label[i][Y[i]] = 1

    return train_data, train_label, test_data, test_label

def color_normalize(train_images, test_images):
    mean = [np.mean(train_images[:, :, :, i]) for i in range(3)]  # [125.307, 122.95, 113.865]
    std = [np.std(train_images[:, :, :, i]) for i in range(3)]  # [62.9932, 62.0887, 66.7048]
    for i in range(3):
        train_images[:, :, :, i] = (train_images[:, :, :, i] - mean[i]) / std[i]
        test_images[:, :, :, i] = (test_images[:, :, :, i] - mean[i]) / std[i]
    return train_images, test_images

def images_augment(images):
    output = []
    for img in images:
        img = cv2.copyMakeBorder(img, 4, 4, 4, 4, cv2.BORDER_CONSTANT, value=[0, 0, 0])
        x = np.random.randint(0, 8)
        y = np.random.randint(0, 8)
        if np.random.randint(0, 2):
            img = cv2.flip(img, 1)
        output.append(img[x: x+32, y:y+32, :])
    return np.ascontiguousarray(output, dtype=np.float32)


def residual_block(inputs, channels, strides=(1, 1)):
    net = BatchNormalization(momentum=0.9, epsilon=1e-5)(inputs)
    net = Activation('relu')(net)

    if strides == (1, 1):
        shortcut = inputs
    else:
        shortcut = Conv2D(channels, (1, 1), strides=strides)(net)

    net = Conv2D(channels, (3, 3), padding='same', strides=strides)(net)
    net = BatchNormalization(momentum=0.9, epsilon=1e-5)(net)
    net = Activation('relu')(net)
    net = Conv2D(channels, (3, 3), padding='same')(net)

    net = add([net, shortcut])
    return net

def ResNet(inputs):
    net = Conv2D(16, (3, 3), padding='same')(inputs)

    for i in range(stack_n):
        net = residual_block(net, 16)

    net = residual_block(net, 32, strides=(2, 2))
    for i in range(stack_n - 1):
        net = residual_block(net, 32)

    net = residual_block(net, 64, strides=(2, 2))
    for i in range(stack_n - 1):
        net = residual_block(net, 64)

    net = BatchNormalization(momentum=0.9, epsilon=1e-5)(net)
    net = Activation('relu')(net)
    net = AveragePooling2D(8, 8)(net)
    net = Flatten()(net)
    net = Dense(10, activation='softmax')(net)
    return net

def cross_entropy(y_true, y_pred):
    cross_entropy = tf.keras.losses.categorical_crossentropy(y_true, y_pred)
    return tf.reduce_mean(cross_entropy)

def l2_loss(model, weights=weight_decay):
    variable_list = []
    for v in model.trainable_variables:
        if 'kernel' in v.name:
            variable_list.append(tf.nn.l2_loss(v))
    return tf.add_n(variable_list) * weights

def accuracy(y_true, y_pred):
    correct_num = tf.equal(tf.argmax(y_true, -1), tf.argmax(y_pred, -1))
    accuracy = tf.reduce_mean(tf.cast(correct_num, dtype=tf.float32))
    return accuracy

@tf.function
def train_step(model, optimizer, x, y):
    with tf.GradientTape() as tape:
        prediction = model(x, training=True)
        ce = cross_entropy(y, prediction)
        l2 = l2_loss(model)
        loss = ce + l2
        gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return ce, prediction

@tf.function
def test_step(model, x, y):
    prediction = model(x, training=False)
    ce = cross_entropy(y, prediction)
    return ce, prediction

def train(model, optimizer, images, labels):
    sum_loss = 0
    sum_accuracy = 0

    # random shuffle
    seed = np.random.randint(0, 65536)
    np.random.seed(seed)
    np.random.shuffle(images)
    np.random.seed(seed)
    np.random.shuffle(labels)

    for i in tqdm(range(iterations_per_epoch)):
        x = images[i * batch_size: (i + 1) * batch_size, :, :, :]
        y = labels[i * batch_size: (i + 1) * batch_size, :]
        x = images_augment(x)

        loss, prediction = train_step(model, optimizer, x, y)
        sum_loss += loss
        sum_accuracy += accuracy(y, prediction)

    print('ce_loss:%f, l2_loss:%f, accuracy:%f' %
          (sum_loss / iterations_per_epoch, l2_loss(model), sum_accuracy / iterations_per_epoch))

def test(model, images, labels):
    sum_loss = 0
    sum_accuracy = 0

    for i in tqdm(range(test_iterations)):
        x = images[i * test_batch_size: (i + 1) * test_batch_size, :, :, :]
        y = labels[i * test_batch_size: (i + 1) * test_batch_size, :]

        loss, prediction = test_step(model, x, y)
        sum_loss += loss
        sum_accuracy += accuracy(y, prediction)

    print('test, loss:%f, accuracy:%f' %
          (sum_loss / test_iterations, sum_accuracy / test_iterations))


if __name__ == '__main__':
    # gpu config
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    tf.config.experimental.set_memory_growth(device=physical_devices[0], enable=True)

    # load data
    # (train_images, train_labels, test_images, test_labels) = load_CIFAR('/home/user/Documents/dataset/Cifar-10')
    (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data()
    train_labels = tf.keras.utils.to_categorical(train_labels, 10)
    test_labels = tf.keras.utils.to_categorical(test_labels, 10)

    train_images, test_images = color_normalize(train_images, test_images)

    # get model
    img_input = Input(shape=(32, 32, 3))
    output = ResNet(img_input)
    model = models.Model(img_input, output)

    # show
    model.summary()

    # train
    learning_rate_schedules = optimizers.schedules.PiecewiseConstantDecay(boundaries, learning_rate)
    optimizer = optimizers.SGD(learning_rate=learning_rate_schedules, momentum=0.9, nesterov=True)

    for epoch in range(epoch_num):
        print('epoch %d' % epoch)
        train(model, optimizer, train_images, train_labels)
        test(model, test_images, test_labels)

        test(model, test_images, test_labels)


里面用的是残差网络

def residual_block(inputs, channels, strides=(1, 1)):
    net = BatchNormalization(momentum=0.9, epsilon=1e-5)(inputs)
    net = Activation('relu')(net)

    if strides == (1, 1):
        shortcut = inputs
    else:
        shortcut = Conv2D(channels, (1, 1), strides=strides)(net)

    net = Conv2D(channels, (3, 3), padding='same', strides=strides)(net)
    net = BatchNormalization(momentum=0.9, epsilon=1e-5)(net)
    net = Activation('relu')(net)
    net = Conv2D(channels, (3, 3), padding='same')(net)

    net = add([net, shortcut])
    return net

def ResNet(inputs):
    net = Conv2D(16, (3, 3), padding='same')(inputs)

    for i in range(stack_n):
        net = residual_block(net, 16)

    net = residual_block(net, 32, strides=(2, 2))
    for i in range(stack_n - 1):
        net = residual_block(net, 32)

    net = residual_block(net, 64, strides=(2, 2))
    for i in range(stack_n - 1):
        net = residual_block(net, 64)

    net = BatchNormalization(momentum=0.9, epsilon=1e-5)(net)
    net = Activation('relu')(net)
    net = AveragePooling2D(8, 8)(net)
    net = Flatten()(net)
    net = Dense(10, activation='softmax')(net)
    return net

 你也可以自己创建model 比如:

import tensorflow as tf
import numpy as np
import pickle as p
import os
from tensorflow.keras import models, optimizers, regularizers
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

weight_decay = 5e-4
batch_size = 128
learning_rate = 1e-2
dropout_rate = 0.5
epoch_num = 50

def load_CIFAR_batch(filename):
    """ load single batch of cifar """
    with open(filename, 'rb')as f:
        datadict = p.load(f, encoding='iso-8859-1')
        X = datadict['data']
        Y = datadict['labels']
        X = X.reshape(10000, 3, 32, 32)
        Y = np.array(Y)
        return X, Y


def load_CIFAR(Foldername):
    train_data = np.zeros([50000, 32, 32, 3], dtype=np.float32)
    train_label = np.zeros([50000, 10], dtype=np.float32)
    test_data = np.zeros([10000, 32, 32, 3], dtype=np.float32)
    test_label = np.zeros([10000, 10], dtype=np.float32)

    for sample in range(5):
        X, Y = load_CIFAR_batch(Foldername + "/data_batch_" + str(sample + 1))

        for i in range(3):
            train_data[10000 * sample:10000 * (sample + 1), :, :, i] = X[:, i, :, :]
        for i in range(10000):
            train_label[i + 10000 * sample][Y[i]] = 1

    X, Y = load_CIFAR_batch(Foldername + "/test_batch")
    for i in range(3):
        test_data[:, :, :, i] = X[:, i, :, :]
    for i in range(10000):
        test_label[i][Y[i]] = 1

    return train_data, train_label, test_data, test_label

def VGG16():
    model = models.Sequential()
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(32, 32, 3), kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(MaxPooling2D((2, 2)))

    model.add(Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(MaxPooling2D((2, 2)))

    model.add(Conv2D(256, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Conv2D(256, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Conv2D(256, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(MaxPooling2D((2, 2)))

    model.add(Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(MaxPooling2D((2, 2)))

    model.add(Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay)))

    model.add(Flatten())  # 2*2*512
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))

    return model


def scheduler(epoch):
    if epoch < epoch_num * 0.4:
        return learning_rate
    if epoch < epoch_num * 0.8:
        return learning_rate * 0.1
    return learning_rate * 0.01


if __name__ == '__main__':
    # gpu config
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    tf.config.experimental.set_memory_growth(device=physical_devices[0], enable=True)

    # load data
    # (train_images, train_labels, test_images, test_labels) = load_CIFAR('/home/user/Documents/dataset/Cifar-10')
    (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data()
    train_labels = tf.keras.utils.to_categorical(train_labels, 10)
    test_labels = tf.keras.utils.to_categorical(test_labels, 10)

    # get model
    model = VGG16()

    # show
    model.summary()

    # train
    sgd = optimizers.SGD(lr=learning_rate, momentum=0.9, nesterov=True)
    change_lr = LearningRateScheduler(scheduler)
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
    model.fit(train_images, train_labels,
              batch_size=batch_size,
              epochs=epoch_num,
              callbacks=[change_lr],
              validation_data=(test_images, test_labels))

VGG16 自己搭建的 

# -*- coding: utf-8 -*-
"""
Created on Sat Oct 24 11:18:48 2020

@author: Jimmy_ouyang
"""


import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import layers,datasets,losses,optimizers,Input,models,regularizers
from tqdm import tqdm
import cv2

epochs = 10
batchs = 64
weight_decay = 5e-4
train_num = 50000

test_batch_size = 64
test_num = 10000
learning_rate = 1e-2


def process (x,y):
	
	#x = tf.reshape(x,(-1,28*28))	
	x=tf.cast(x,dtype = tf.float64)/255.
	x=tf.cast(x,dtype = tf.float64)
	y=tf.one_hot(y,depth = 10)
	y=tf.reshape(y,(-1,10))	
	return x,y

def color_normalize(train_images, test_images):
    mean = [np.mean(train_images[:, :, :, i]) for i in range(3)]  # [125.307, 122.95, 113.865]
    std = [np.std(train_images[:, :, :, i]) for i in range(3)]  # [62.9932, 62.0887, 66.7048]
    for i in range(3):
        train_images[:, :, :, i] = (train_images[:, :, :, i] - mean[i]) / std[i]
        test_images[:, :, :, i] = (test_images[:, :, :, i] - mean[i]) / std[i]
    return train_images, test_images

def pic_agument(images):
	output = []
	for img in images:
		img = cv2.copyMakeBorder(img, 4, 4, 4, 4, cv2.BORDER_CONSTANT, value=[0, 0, 0])
		x = np.random.randint(0, 8)
		y = np.random.randint(0, 8)
		if np.random.randint(0, 2):
			img = cv2.flip(img, 1)
		output.append(img[x: x+32, y:y+32, :])
	return np.ascontiguousarray(output, dtype=np.float32)
	

def VGG16(x):
    cv1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(32, 32, 3), kernel_regularizer=regularizers.l2(weight_decay))(x)
    cv2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(cv1)
    pool1 = layers.MaxPooling2D((2, 2))(cv2)

    cv3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(pool1)
    cv4 = layers.Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(cv3)
    pool2 = layers.MaxPooling2D((2, 2))(cv4)

    cv5 = layers.Conv2D(256, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(pool2)
    cv6 = layers.Conv2D(256, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(cv5)
    cv7 = layers.Conv2D(256, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(cv6)
    pool3 = layers.MaxPooling2D((2, 2))(cv7)

    cv8 = layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(pool3)
    cv9 = layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(cv8)
    cv10 = layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(cv9)
    pool4 = layers.MaxPooling2D((2, 2))(cv10)

    cv11= layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(pool4)
    cv12 = layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(cv11)
    cv13 = layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(cv12)

    fl1 = layers.GlobalAveragePooling2D()(cv13)  # 2*2*512
    fl2 = layers.Dense(1024, activation='relu')(fl1)
    fl3 = layers.Dropout(0.7)(fl2)
    fl4 = layers.Dense(256, activation='relu')(fl3)
    fl5 = layers.Dropout(0.7)(fl4)
    out = layers.Dense(10, activation='softmax')(fl5)

    return out

	
def cross_entropy(y_true, y_pred):
	cross_entropy = tf.keras.losses.categorical_crossentropy(y_true, y_pred)
	return tf.reduce_mean(cross_entropy)


def accuracy(y_true, y_pred):
    correct_num = tf.equal(tf.argmax(y_true, -1), tf.argmax(y_pred, -1))
    accuracy = tf.reduce_mean(tf.cast(correct_num, dtype=tf.float32))
    return accuracy

@tf.function
def train_step(model, optimizer, x, y):
    with tf.GradientTape() as tape:
        prediction = model(x, training=True)
        ce = cross_entropy(y, prediction)

        loss = ce 
        gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return ce, prediction



		
def train(model ,optimizer,train_data,epoch):
	sum_loss = 0
	sum_accuracy = 0

	for i,(x,y) in enumerate(train_data):
		with tf.GradientTape() as tape:
			prediction = model(x, training=True)
			cross_entropy = tf.keras.losses.categorical_crossentropy(y, prediction)
			loss = tf.reduce_mean(cross_entropy)
			acc = accuracy(y,prediction)
			gradients = tape.gradient(loss, model.trainable_variables)
			sum_loss += loss
			sum_accuracy += acc
		optimizer.apply_gradients(zip(gradients, model.trainable_variables))
			
		

	print('epoch:%f ,train_ce_loss:%f , accuracy:%f' %(epoch,sum_loss / i, sum_accuracy / i))				

def test(model,test_data,epoch):
	sum_loss = 0
	sum_accuracy = 0

	for i,(x,y) in enumerate(test_data) :
		out = model(x,training = False)
		loss = tf.keras.losses.categorical_crossentropy(y,out)
		loss = tf.reduce_mean(loss)
		test_accuarcy = accuracy(y,out)
		sum_accuracy += test_accuarcy
		sum_loss += loss

	print('epoch:%f ,test_ce_loss:%f , accuracy:%f' %(epoch,sum_loss / i, sum_accuracy / i))				
		
if __name__ == "__main__":
	
	physical_devices = tf.config.experimental.list_physical_devices('GPU')
	tf.config.experimental.set_memory_growth(device=physical_devices[0], enable=True)

	(x_train,y_train),(x_test,y_test) = datasets.cifar10.load_data()
	x_train = pic_agument(x_train)
	#x_train , x_test = color_normalize(x_train,x_test)
	
	train_data = tf.data.Dataset.from_tensor_slices((x_train,y_train)).shuffle(len(x_train)).batch(batchs).map(process)
	test_data = tf.data.Dataset.from_tensor_slices((x_test,y_test)).batch(batchs).map(process)
	
	img_input = Input(shape=(32,32,3))
	output = VGG16(img_input)
	model = models.Model(img_input, output)
	#model = VGG16()
	model.summary()

	#learning_rate = 0.01 

	optimizer = tf.keras.optimizers.Adam(0.0001) #学习率一定要设置的小一些 不然梯度不会更新
	#optimizer = optimizers.SGD(lr=learning_rate, momentum=0.9, nesterov=True)
	for epoch in range(epochs) :
		train(model ,optimizer,train_data,epoch)
		test(model,test_data,epoch)
		

Residual net 自己搭建的   我们要速度和精度 双追求

# -*- coding: utf-8 -*-
"""
Created on Sat Oct 24 11:18:48 2020

@author: Jimmy_ouyang
"""


import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import layers,datasets,losses,optimizers,Input,models,regularizers
from tqdm import tqdm
import cv2
import time

stack_n = 18  # layers = stack_n * 6 + 2
weight_decay = 1e-4

# training config
batch_size = 128
train_num = 50000
iterations_per_epoch = int(train_num / batch_size)
learning_rate = [0.001, 0.00001, 0.000001]
boundaries = [80 * iterations_per_epoch, 120 * iterations_per_epoch]
epoch_num = 5

def process (x,y):
	
	#x = tf.reshape(x,(-1,28*28))	
	x=tf.cast(x,dtype = tf.float64)/255.
	x=tf.cast(x,dtype = tf.float64)
	y=tf.one_hot(y,depth = 10)
	y=tf.reshape(y,(-1,10))	
	return x,y

def color_normalize(train_images, test_images):
    mean = [np.mean(train_images[:, :, :, i]) for i in range(3)]  # [125.307, 122.95, 113.865]
    std = [np.std(train_images[:, :, :, i]) for i in range(3)]  # [62.9932, 62.0887, 66.7048]
    for i in range(3):
        train_images[:, :, :, i] = (train_images[:, :, :, i] - mean[i]) / std[i]
        test_images[:, :, :, i] = (test_images[:, :, :, i] - mean[i]) / std[i]
    return train_images, test_images

def pic_agument(images):
	output = []
	for img in images:
		img = cv2.copyMakeBorder(img, 4, 4, 4, 4, cv2.BORDER_CONSTANT, value=[0, 0, 0])
		x = np.random.randint(0, 8)
		y = np.random.randint(0, 8)
		if np.random.randint(0, 2):
			img = cv2.flip(img, 1)
		output.append(img[x: x+32, y:y+32, :])
	return np.ascontiguousarray(output, dtype=np.float32)
	


def residual_step(layer,channels,strides=(1,1)):
	layer = layers.BatchNormalization(momentum = 0.9 ,epsilon = 1e-5)(layer)
	layer = tf.nn.relu(layer)

	if strides ==(1,1):
		short =layer
	else :
		short = layers.Conv2D(channels,(1,1),strides = strides)(layer)
	
	layer = layers.Conv2D(channels,(3,3),padding = 'same',strides = strides)(layer)
	layer = layers.BatchNormalization(momentum = 0.9 ,epsilon = 1e-5)(layer)
	layer = tf.nn.relu(layer)
	layer = layers.Conv2D(channels,(3,3),padding = 'same')(layer)
	
	layer = layers.add([short,layer])
	
	return layer
	
	
def residual(x):
	
	net = layers.Conv2D(16,(3,3),padding='same')(x)
 
	for i in range(stack_n):
		net = residual_step(net,16)
		
	
	net = residual_step(net,32,strides = (2,2))
	
	for i in range(stack_n-1):
		net = residual_step(net,32)
	
	net = residual_step(net,64,strides = (2,2))
	
	for i in range(stack_n-1):
		net = residual_step(net,64)
	
	
	net = layers.BatchNormalization(momentum = 0.9 ,epsilon = 1e-5)(net)
	net = tf.nn.relu(net)
	net = layers.AveragePooling2D(8,8)(net)
	net = layers.Flatten()(net)
	net = layers.Dense(10,activation = 'softmax')(net)
	
	return net

	
def cross_entropy(y_true, y_pred):
	cross_entropy = tf.keras.losses.categorical_crossentropy(y_true, y_pred)
	return tf.reduce_mean(cross_entropy)


def accuracy(y_true, y_pred):
    correct_num = tf.equal(tf.argmax(y_true, -1), tf.argmax(y_pred, -1))
    accuracy = tf.reduce_mean(tf.cast(correct_num, dtype=tf.float32))
    return accuracy

def l2_loss(model, weights=weight_decay):
    variable_list = []
    for v in model.trainable_variables:
        if 'kernel' in v.name:
            variable_list.append(tf.nn.l2_loss(v))
    return tf.add_n(variable_list) * weights

@tf.function
def train_step(model,x,y,optimizator):
  with tf.GradientTape() as tape:
			prediction = model(x, training=True)
			cross_entropy = tf.keras.losses.categorical_crossentropy(y, prediction)
			ce = tf.reduce_mean(cross_entropy)
			l2 = l2_loss(model)
			loss = ce + l2
			acc = accuracy(y,prediction)
			gradients = tape.gradient(loss, model.trainable_variables)
			
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))
  return loss ,acc
  		
def train(model ,optimizer,train_data,epoch):
	sum_loss = 0
	sum_accuracy = 0

	for i,(x,y) in enumerate(train_data):
		loss ,acc = train_step(model,x,y,optimizer)
		sum_loss += loss
		sum_accuracy += acc

		
			
	print('epoch:%f ,train_ce_loss:%f , accuracy:%f' %(epoch,sum_loss / i, sum_accuracy / i))	
 			
@tf.function
def test_step(model, x, y):
    prediction = model(x, training=False)
    ce = cross_entropy(y, prediction)
    return ce, prediction

def test(model,test_data,epoch):
	sum_loss = 0
	sum_accuracy = 0

	for i,(x,y) in enumerate(test_data) :
		loss, prediction = test_step(model, x, y)
		sum_loss += loss
		sum_accuracy += accuracy(y, prediction)

	print('epoch:%f ,test_ce_loss:%f , accuracy:%f' %(epoch,sum_loss / i, sum_accuracy / i))				
		
if __name__ == "__main__":
	
	'''physical_devices = tf.config.experimental.list_physical_devices('GPU')
	if physical_devices :
		gpu0 = physical_devices[0]
		tf.config.experimental.set_memory_growth(device=physical_devices[0], enable=True)
		tf.config.set_visible_devices([gpu0],"GPU") '''

	(x_train,y_train),(x_test,y_test) = datasets.cifar10.load_data()
	x_train = pic_agument(x_train)
	#x_train , x_test = color_normalize(x_train,x_test)
	
	train_data = tf.data.Dataset.from_tensor_slices((x_train,y_train)).shuffle(len(x_train)).batch(batch_size).map(process)
	test_data = tf.data.Dataset.from_tensor_slices((x_test,y_test)).batch(batch_size).map(process)
	
	img_input = Input(shape=(32,32,3))
	output = residual(img_input)
	model = models.Model(img_input, output)
	#model = VGG16()
	#model.summary()

	#learning_rate = 0.01 

	#optimizer = tf.keras.optimizers.Adam(0.1) #学习率一定要设置的小一些 不然梯度不会更新
	#optimizer = optimizers.SGD(lr=learning_rate, momentum=0.9, nesterov=True)
	learning_rate_schedules = optimizers.schedules.PiecewiseConstantDecay(boundaries, learning_rate)
	optimizer = optimizers.SGD(learning_rate=learning_rate_schedules, momentum=0.9, nesterov=True)

	for epoch in range(epoch_num) :
		start = time.time()
		train(model ,optimizer,train_data,epoch)
		test(model,test_data,epoch)
		end = time.time()
		print("循环运行时间:%.2f秒"%(end-start))
	
	model.save('/home/jimmy/Documents/models/resnet/', save_format='tf')

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值