生成模型应用——使用变分自编码器(VAE)控制人脸属性生成人脸图片

本文介绍了使用VariationalAutoencoder(VAE)在CelebA数据集上进行图像生成的过程,包括数据预处理、模型结构(如Encoder和Decoder)、损失函数以及特征向量的探索。作者展示了如何通过调整latent变量来控制属性向量生成人脸图片。
摘要由CSDN通过智能技术生成

return image, image

ds_train = ds_train.map(preprocess)

ds_train = ds_train.shuffle(128)

ds_train = ds_train.batch(batch_size, drop_remainder=True).prefetch(batch_size)

ds_test = ds_test_.map(preprocess).batch(batch_size, drop_remainder=True).prefetch(batch_size)

train_num = ds_info.splits[‘train’].num_examples

test_num = ds_info.splits[‘test’].num_examples

class GaussianSampling(Layer):

def call(self, inputs):

means, logvar = inputs

epsilon = tf.random.normal(shape=tf.shape(means), mean=0., stddev=1.)

samples = means + tf.exp(0.5 * logvar) * epsilon

return samples

class DownConvBlock(Layer):

count = 0

def init(self, filters, kernel_size=(3,3), strides=1, padding=‘same’):

super(DownConvBlock, self).init(name=f"DownConvBlock_{DownConvBlock.count}")

DownConvBlock.count += 1

self.forward = Sequential([

Conv2D(filters, kernel_size, strides, padding),

BatchNormalization(),

LeakyReLU(0.2)

])

def call(self, inputs):

return self.forward(inputs)

class UpConvBlock(Layer):

count = 0

def init(self, filters, kernel_size=(3,3), padding=‘same’):

super(UpConvBlock, self).init(name=f"UpConvBlock_{UpConvBlock.count}")

UpConvBlock.count += 1

self.forward = Sequential([

Conv2D(filters, kernel_size, 1, padding),

LeakyReLU(0.2),

UpSampling2D((2,2))

])

def call(self, inputs):

return self.forward(inputs)

class Encoder(Layer):

def init(self, z_dim, name=‘encoder’):

super(Encoder, self).init(name=name)

self.features_extract = Sequential([

DownConvBlock(filters=32, kernel_size=(3,3), strides=2),

DownConvBlock(filters=32, kernel_size=(3,3), strides=2),

DownConvBlock(filters=64, kernel_size=(3,3), strides=2),

DownConvBlock(filters=64, kernel_size=(3,3), strides=2),

Flatten()

])

self.dense_mean = Dense(z_dim, name=‘mean’)

self.dense_logvar = Dense(z_dim, name=‘logvar’)

self.sampler = GaussianSampling()

def call(self, inputs):

x = self.features_extract(inputs)

mean = self.dense_mean(x)

logvar = self.dense_logvar(x)

z = self.sampler([mean, logvar])

return z, mean, logvar

class Decoder(Layer):

def init(self, z_dim, name=‘decoder’):

super(Decoder, self).init(name=name)

self.forward = Sequential([

Dense(7764, activation=‘relu’),

Reshape((7,7,64)),

UpConvBlock(filters=64, kernel_size=(3,3)),

UpConvBlock(filters=64, kernel_size=(3,3)),

UpConvBlock(filters=32, kernel_size=(3,3)),

UpConvBlock(filters=32, kernel_size=(3,3)),

Conv2D(filters=3, kernel_size=(3,3), strides=1, padding=‘same’, activation=‘sigmoid’)

])

def call(self, inputs):

return self.forward(inputs)

class VAE(Model):

def init(self, z_dim, name=‘VAE’):

super(VAE, self).init(name=name)

self.encoder = Encoder(z_dim)

self.decoder = Decoder(z_dim)

self.mean = None

self.logvar = None

def call(self, inputs):

z, self.mean, self.logvar = self.encoder(inputs)

out = self.decoder(z)

return out

if num_devices > 1:

with strategy.scope():

vae = VAE(z_dim=200)

else:

vae = VAE(z_dim=200)

def vae_kl_loss(y_true, y_pred):

kl_loss = -0.5 * tf.reduce_mean(1 + vae.logvar - tf.square(vae.mean) - tf.exp(vae.logvar))

return kl_loss

def vae_rc_loss(y_true, y_pred):

rc_loss = tf.keras.losses.MSE(y_true, y_pred)

return rc_loss

def vae_loss(y_true, y_pred):

kl_loss = vae_kl_loss(y_true, y_pred)

rc_loss = vae_rc_loss(y_true, y_pred)

kl_weight_const = 0.01

return kl_weight_const * kl_loss + rc_loss

model_path = “vae_faces_cele_a.h5”

checkpoint = ModelCheckpoint(

model_path,

monitor=‘vae_rc_loss’,

verbose=1,

save_best_only=True,

mode=‘auto’,

save_weights_only=True

)

early = EarlyStopping(

monitor=‘vae_rc_loss’,

mode=‘auto’,

patience=3

)

callbacks_list = [checkpoint, early]

initial_learning_rate = 1e-3

steps_per_epoch = int(np.round(train_num/batch_size))

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(

initial_learning_rate,

decay_steps=steps_per_epoch,

decay_rate=0.96,

staircase=True

)

vae.compile(

loss=[vae_loss],

optimizer=tf.keras.optimizers.RMSprop(learning_rate=3e-3),

metrics=[vae_kl_loss, vae_rc_loss]

)

history = vae.fit(ds_train, validation_data=ds_test,epochs=50,callbacks=callbacks_list)

images, labels = next(iter(ds_train))

vae.load_weights(model_path)

outputs = vae.predict(images)

Display

grid_col = 8

grid_row = 2

f, axarr = plt.subplots(grid_row, grid_col, figsize=(grid_col2, grid_row2))

i = 0

for row in range(0, grid_row, 2):

for col in range(grid_col):

axarr[row,col].imshow(images[i])

axarr[row,col].axis(‘off’)

axarr[row+1,col].imshow(outputs[i])

axarr[row+1,col].axis(‘off’)

i += 1

f.tight_layout(0.1, h_pad=0.2, w_pad=0.1)

plt.show()

avg_z_mean = []

avg_z_std = []

for i in range(steps_per_epoch):

images, labels = next(iter(ds_train))

z, z_mean, z_logvar = vae.encoder(images)

avg_z_mean.append(np.mean(z_mean, axis=0))

avg_z_std.append(np.mean(np.exp(0.5*z_logvar),axis=0))

avg_z_mean = np.mean(avg_z_mean, axis=0)

avg_z_std = np.mean(avg_z_std, axis=0)

plt.plot(avg_z_mean)

plt.ylabel(“Average z mean”)

plt.xlabel(“z dimension”)

grid_col = 10

grid_row = 10

f, axarr = plt.subplots(grid_row, grid_col, figsize=(grid_col, 1.5*grid_row))

i = 0

for row in range(grid_row):

for col in range(grid_col):

axarr[row, col].hist(z[:,i], bins=20)

axarr[row, col].axis(‘off’)

i += 1

#f.tight_layout(0.1, h_pad=0.2, w_pad=0.1)

plt.show()

z_dim = 200

z_samples = np.random.normal(loc=0, scale=1, size=(25, z_dim))

images = vae.decoder(z_samples.astype(np.float32))

grid_col = 7

grid_row = 2

f, axarr = plt.subplots(grid_row, grid_col, figsize=(2grid_col,2grid_row))

i = 0

for row in range(grid_row):

for col in range(grid_col):

axarr[row, col].imshow(images[i])

axarr[row, col].axis(‘off’)

i += 1

f.tight_layout(0.1, h_pad=0.2, w_pad=0.1)

plt.show()

采样技巧

z_samples = np.random.normal(loc=0., scale=np.mean(avg_z_std), size=(25, z_dim))

z_samples += avg_z_mean

images = vae.decoder(z_samples.astype(np.float32))

grid_col = 7

grid_row = 2

f, axarr = plt.subplots(grid_row, grid_col, figsize=(2grid_col, 2grid_row))

i = 0

for row in range(grid_row):

for col in range(grid_col):

axarr[row,col].imshow(images[i])

axarr[row,col].axis(‘off’)

i += 1

f.tight_layout(0.1, h_pad=0.2, w_pad=0.1)

plt.show()

(ds_train, ds_test), ds_info = tfds.load(

‘celeb_a’,

split=[‘train’, ‘test’],

shuffle_files=True,

with_info=True)

test_num = ds_info.splits[‘test’].num_examples

def preprocess_attrib(sample, attribute):

image = sample[‘image’]

image = tf.image.resize(image, [112, 112])

image = tf.cast(image, tf.float32) / 255.

return image, sample[‘attributes’][attribute]

def extract_attrib_vector(attribute, ds):

batch_size = 32 * num_devices

ds = ds.map(lambda x: preprocess_attrib(x, attribute))

ds = ds.batch(batch_size)

steps_per_epoch = int(np.round(test_num / batch_size))

pos_z = []

pos_z_num = []

neg_z = []

neg_z_num = []

for i in range(steps_per_epoch):

images, labels = next(iter(ds))

z, z_mean, z_logvar = vae.encoder(images)

z = z.numpy()

step_pos_z = z[labels==True]

pos_z.append(np.mean(step_pos_z, axis=0))

pos_z_num.append(step_pos_z.shape[0])

step_neg_z = z[labels==False]

neg_z.append(np.mean(step_neg_z, axis=0))

neg_z_num.append(step_neg_z.shape[0])

avg_pos_z = np.average(pos_z, axis=(0), weights=pos_z_num)

avg_neg_z = np.average(neg_z, axis=(0), weights=neg_z_num)

attrib_vector = avg_pos_z - avg_neg_z

return attrib_vector

attributes = list(ds_info.features[‘attributes’].keys())

attribs_vectors = {}

for attrib in attributes:

print(attrib)

attribs_vectors[attrib] = extract_attrib_vector(attrib, ds_test)

def explore_latent_variable(image, attrib):

grid_col = 8

grid_row = 1

z_samples,, = vae.encoder(tf.expand_dims(image,0))

f, axarr = plt.subplots(grid_row, grid_col, figsize=(2grid_col, 2grid_row))

i = 0

row = 0

step = -3

axarr[0].imshow(image)

axarr[0].axis(‘off’)

for col in range(1, grid_col):

new_z_samples = z_samples + step * attribs_vectors[attrib]

reconstructed_image = vae.decoder(new_z_samples)

step += 1

axarr[col].imshow(reconstructed_image[0])

axarr[col].axis(‘off’)

i += 1

f.tight_layout(0.1, h_pad=0.2, w_pad=0.1)

plt.show()

ds_test1 = ds_test.map(preprocess).batch(100)

images, labels = next(iter(ds_test1))

控制属性向量生成人脸图片

最后

🍅 硬核资料:关注即可领取PPT模板、简历模板、行业经典书籍PDF。
🍅 技术互助:技术群大佬指点迷津,你的问题可能不是问题,求资源在群里喊一声。
🍅 面试题库:由技术群里的小伙伴们共同投稿,热乎的大厂面试真题,持续更新中。
🍅 知识体系:含编程语言、算法、大数据生态圈组件(Mysql、Hive、Spark、Flink)、数据仓库、Python、前端等等。

网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。

需要这份系统化学习资料的朋友,可以戳这里无偿获取

一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!

  • 27
    点赞
  • 14
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值