Tensorflow2.x模型搭建的几种代码形式

一只想当程序员的Z

已于 2022-01-18 18:23:16 修改

阅读量1.5k

点赞数

文章标签： python tensorflow 开发语言深度学习

于 2022-01-18 12:34:06 首次发布

本文链接：https://blog.csdn.net/ETHAN112/article/details/122557447

版权

相信很多新手小白在才开始初学时就想要搭建自己的深度学习模型，但在看到每个风格不同的算法时，又会把前向传播，反向传播，和模型的搭建过程混淆，我总结了一下几种基于Tensorflow2.x搭建模型的代码。

1.学习过程中最常见的数据切片，载入并预处理，这一部分呢模型运用Sequential搭建，好处是方便简洁，但是前向传播的顺序是固定的，且在训练过程和优化过程中都调用了API。

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets,layers,optimizers,Sequential,metrics,losses



def prrocess(x,y):
    x = tf.cast(x,dtype=tf.float32)/255
    y = tf.cast(y,dtype=tf.int32)

    return x,y


(x,y),(x_test,y_test) = datasets.fashion_mnist.load_data()
print(x.shape,y.shape)
batchsz = 128

db = tf.data.Dataset.from_tensor_slices((x,y))
db = db.map(prrocess).shuffle(10000).batch(batchsz)

db_test = tf.data.Dataset.from_tensor_slices((x_test,y_test))
db_test = db.map(prrocess).shuffle(10000).batch(batchsz)

db_iter = iter(db)
sample = next(db_iter)

model = Sequential([
    layers.Dense(256,activation=tf.nn.relu),
    layers.Dense(128,activation=tf.nn.relu),
    layers.Dense(64,activation=tf.nn.relu),
    layers.Dense(32,activation=tf.nn.relu),
    layers.Dense(10)
])
model.built(input_shape=[None,28*28])
model.summary()
optimizer = optimizers.Adam(lr=1e-3)

def main():
    for epoch in range(30):
        for step,(x,y) in enumerate(db):
            # x -> [b,784]
            x = tf.reshape(x,[-1,28*28])
            with tf.GradientTape() as tape:
                logits = model(x)
                y_onehot = tf.one_hot(y,depth=10)
                loss_mes = tf.reduce_mean(tf.losses.MSE(y_onehot,logits))
                loss_ce =tf.reduce_mean(tf.losses.categorical_crossentropy(y_onehot,logits,from_logits=True))

            grads = tape.gradient(loss_ce,model.trainable_variables)
            optimizer.apply_gradients(zip(grads,model.trainable_variables))# zip Lian jie

            if step % 100 ==0:
                print(epoch,step,'loss:',float(loss_ce),float(loss_mes)).

        total_correct=0
        total_num=0
        for x,y in db_test:
            x = tf.reshape(x,[-1,28*28])
            logits = model(x)
            prob = tf.nn.softmax(logits,axis=1)
            pred = tf.argmax(prob,axis=1)
            pred = tf.cast(pred,dtype=tf.int32)
            correct = tf.equal(pred,y)
            correct = tf.reduce_sum(tf.cast(correct,dtype=tf.int32))

            total_correct += int(correct)
            total_num += x.shape[0]

2.在这种代码类型中，没有调用API，而是从最底层的w1，w2...每个神经元的参数和计算方式着手，在梯度计算个更新中都能代入自己的损失函数和优化过程。

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets,metrics
#[60k,28,28]
(x, y), (x_val, y_val)= datasets.mnist.load_data()

#
x = tf.convert_to_tensor(x,dtype=tf.float32)/255.
y = tf.convert_to_tensor(y,dtype=tf.int32)
x_val = tf.convert_to_tensor(x_val,dtype=tf.float32)/255
y_val = tf.convert_to_tensor(y_val,dtype=tf.int32)
print(x.shape,y.shape)

print(tf.reduce_max(x))

train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128)
test_db = tf.data.Dataset.from_tensor_slices((x_val,y_val)).batch(128)
# train_iter = iter(train_db)
# sample = next(train_iter)
# print('batch=',sample[0].shape)


w1 =tf.Variable(tf.random.truncated_normal([784,256],stddev=0.1))  #均值为0 方差为0 不会梯度爆炸
b1 =tf.Variable (tf.zeros([256]))
w2 =tf.Variable( tf.random.truncated_normal([256,128],stddev=0.1))
b2 =tf.Variable(tf.zeros([128]))
w3 =tf.Variable( tf.random.truncated_normal([128,10],stddev=0.1))
b3 =tf.Variable (tf.zeros([10]))


lr = 1e-3
acc_meter = metrics.Accuracy()  # 新建accuracy测量器
for step,(x,y) in enumerate(train_db):
    with tf.GradientTape() as tape:
        x = tf.reshape(x, [-1, 28 * 28])
        h1 = x @ w1 + b1
        h1 = tf.nn.relu(h1)
        h2 = h1 @ w2 + b2
        h2 = tf.nn.relu(h2)
        h3 = h2 @ w3 + b3
        h3 = tf.nn.softmax(h3)

        y_onehot = tf.one_hot(y, depth=10)
        loss = tf.square(y_onehot - h3)
        loss = tf.reduce_mean(loss) / 128
    grads = tape.gradient(loss,[w1,b1,w2,b2,w3,b3])
    w1.assign_sub(lr * grads[0])
    b1.assign_sub(lr * grads[1])
    w2.assign_sub(lr * grads[2])
    b2.assign_sub(lr * grads[3])
    w3.assign_sub(lr * grads[4])
    b3.assign_sub(lr * grads[5])
    acc_meter.update_state(tf.argmax(h3, axis=1), y)


    if step % 100 ==0:
        print(step,'loss:',float(loss))
        acc_meter.reset_states()



#val
total_correct ,total_num = 0,0
for step, (x, y) in enumerate(test_db):

    x = tf.reshape(x,[-1,28 * 28])

    hi = tf.nn.relu(x@w1 + b1)
    h2 = tf.nn.relu(h1@w2 + b2)
    print(h2.shape)
    out = (h2 @ w3 + b3)
    print(out.shape)
    prob = tf.nn.softmax(out,axis=1)
    print(prob.shape)
    pred = tf.argmax(prob,axis=1)
    print(pred.dtype,y.dtype)
    pred = tf.cast(pred,dtype=tf.int32)
    print(pred.dtype, y.dtype,pred.shape,y.shape)
    correct = tf.cast(tf.equal(pred,y),dtype=tf.int32)
    correct = tf.reduce_sum(correct)

    total_correct +=int(correct)
    total_num += x.shape[0]
    acc = total_correct/total_num
    print('test acc ',acc)

3.在搭建更深层的网路时，我们可以将多次重复使用的部分创建为一个class，调用tensorflow里的layers，在class的初始中写入网络的构建，在call中写入前向传播过程。是比较常用的类型。如下为GAN网络中判别器的代码。

class Discriminator(keras.Model):
    def __init__(self):
        super(Discriminator, self).__init__()

        self.conv1 = layers.Conv2D(64, 5, 3, 'valid')

        self.conv2 = layers.Conv2D(128, 5, 3, 'valid')

        self.bn2 = layers.BatchNormalization()

        self.conv3 = layers.Conv2D(256, 5, 3, 'valid')
        self.bn3 = layers.BatchNormalization()

        self.flatten = layers.Flatten()
        self.fc = layers.Dense(1)
        # [b, 64, 64, 3] => [b, 1]

    def call(self, inputs, training=None):
        x = self.conv1(inputs)
        x = tf.nn.leaky_relu(x)
        x = self.conv2(x)
        x = self.bn2(x, training=training)
        x = tf.nn.leaky_relu(x)
        x = self.conv3(x)
        x = self.bn3(x, training=training)
        x = tf.nn.leaky_relu(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x