第G8周:ACGAN任务

本周任务:
根据GAN、CGAN、SGAN及它们的框架图,写出ACGAN代码。

框架图

在这里插入图片描述
从图中可以看到,ACGAN的前半部分类似于CGAN,后半部分类似于SGAN,因此,代码前半部分模仿CGAN,后半部分模仿SGAN

配置代码

import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from torchvision.utils import save_image
from torchvision.utils import make_grid
from torch.utils.tensorboard import SummaryWriter
from torchsummary import summary
import matplotlib.pyplot as plt
import datetime
import argparse

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = 128

这里先定义基本常量,作用相当于parser = argparse.ArgumentParser()

class Args:
    n_epochs = 200
    batch_size = 64
    lr = 0.0002
    b1 = 0.5
    b2 = 0.999
    n_cpu = 8
    latent_dim = 100
    img_size = 128 # 看图像类型
    channels = 3 # 看图像类型
    sample_interval = 400

opt = Args()
print(opt)
train_transform = transforms.Compose([
    transforms.Resize(128),
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])])

train_dataset = datasets.ImageFolder(root="F:/365data/G3/rps/", transform=train_transform)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True,
                                           num_workers=6)
def show_images(images):
    fig, ax = plt.subplots(figsize=(20, 20))
    ax.set_xticks([]); ax.set_yticks([])
    ax.imshow(make_grid(images.detach(), nrow=22).permute(1, 2, 0))

def show_batch(dl):
    for images, _ in dl:
        show_images(images)
        break
image_shape = (3, 128, 128)
image_dim = int(np.prod(image_shape))
latent_dim = 100

n_classes = 3
embedding_dim = 100
# 自定义权重初始化函数,用于初始化生成器和判别器的权重
def weights_init(m):
    # 获取当前层的类名
    classname = m.__class__.__name__

    # 如果当前层是卷积层(类名中包含 'Conv' )
    if classname.find('Conv') != -1:
        # 使用正态分布随机初始化权重,均值为0,标准差为0.02
        torch.nn.init.normal_(m.weight, 0.0, 0.02)
    
    # 如果当前层是批归一化层(类名中包含 'BatchNorm' )
    elif classname.find('BatchNorm') != -1:
        # 使用正态分布随机初始化权重,均值为1,标准差为0.02
        torch.nn.init.normal_(m.weight, 1.0, 0.02)
        # 将偏置项初始化为全零
        torch.nn.init.zeros_(m.bias)
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()

        # 定义条件标签的生成器部分,用于将标签映射到嵌入空间中
        # n_classes:条件标签的总数
        # embedding_dim:嵌入空间的维度
        self.label_conditioned_generator = nn.Sequential(
            nn.Embedding(n_classes, embedding_dim),  # 使用Embedding层将条件标签映射为稠密向量
            nn.Linear(embedding_dim, 16)             # 使用线性层将稠密向量转换为更高维度
        )

        # 定义潜在向量的生成器部分,用于将噪声向量映射到图像空间中
        # latent_dim:潜在向量的维度
        self.latent = nn.Sequential(
            nn.Linear(latent_dim, 4*4*512),  # 使用线性层将潜在向量转换为更高维度
            nn.LeakyReLU(0.2, inplace=True)  # 使用LeakyReLU激活函数进行非线性映射
        )

        # 定义生成器的主要结构,将条件标签和潜在向量合并成生成的图像
        self.model = nn.Sequential(
            # 反卷积层1:将合并后的向量映射为64x8x8的特征图
            nn.ConvTranspose2d(513, 64*8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64*8, momentum=0.1, eps=0.8),  # 批标准化
            nn.ReLU(True),  # ReLU激活函数
            # 反卷积层2:将64x8x8的特征图映射为64x4x4的特征图
            nn.ConvTranspose2d(64*8, 64*4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64*4, momentum=0.1, eps=0.8),
            nn.ReLU(True),
            # 反卷积层3:将64x4x4的特征图映射为64x2x2的特征图
            nn.ConvTranspose2d(64*4, 64*2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64*2, momentum=0.1, eps=0.8),
            nn.ReLU(True),
            # 反卷积层4:将64x2x2的特征图映射为64x1x1的特征图
            nn.ConvTranspose2d(64*2, 64*1, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64*1, momentum=0.1, eps=0.8),
            nn.ReLU(True),
            # 反卷积层5:将64x1x1的特征图映射为3x64x64的RGB图像
            nn.ConvTranspose2d(64*1, 3, 4, 2, 1, bias=False),
            nn.Tanh()  # 使用Tanh激活函数将生成的图像像素值映射到[-1, 1]范围内
        )

    def forward(self, inputs):
        noise_vector, label = inputs
        # 通过条件标签生成器将标签映射为嵌入向量
        label_output = self.label_conditioned_generator(label)
        # 将嵌入向量的形状变为(batch_size, 1, 4, 4),以便与潜在向量进行合并
        label_output = label_output.view(-1, 1, 4, 4)
        # 通过潜在向量生成器将噪声向量映射为潜在向量
        latent_output = self.latent(noise_vector)
        # 将潜在向量的形状变为(batch_size, 512, 4, 4),以便与条件标签进行合并
        latent_output = latent_output.view(-1, 512, 4, 4)
        
        # 将条件标签和潜在向量在通道维度上进行合并,得到合并后的特征图
        concat = torch.cat((latent_output, label_output), dim=1)
        # 通过生成器的主要结构将合并后的特征图生成为RGB图像
        image = self.model(concat)
        return image
generator = Generator().to(device)
generator.apply(weights_init)
print(generator)
from torchinfo import summary

summary(generator)
=================================================================
Layer (type:depth-idx)                   Param #
=================================================================
Generator                                --
├─Sequential: 1-1                        --
│    └─Embedding: 2-1                    300
│    └─Linear: 2-2                       1,616
├─Sequential: 1-2                        --
│    └─Linear: 2-3                       827,392
│    └─LeakyReLU: 2-4                    --
├─Sequential: 1-3                        --
│    └─ConvTranspose2d: 2-5              4,202,496
│    └─BatchNorm2d: 2-6                  1,024
│    └─ReLU: 2-7                         --
│    └─ConvTranspose2d: 2-8              2,097,152
│    └─BatchNorm2d: 2-9                  512
│    └─ReLU: 2-10                        --
│    └─ConvTranspose2d: 2-11             524,288
│    └─BatchNorm2d: 2-12                 256
│    └─ReLU: 2-13                        --
│    └─ConvTranspose2d: 2-14             131,072
│    └─BatchNorm2d: 2-15                 128
│    └─ReLU: 2-16                        --
│    └─ConvTranspose2d: 2-17             3,072
│    └─Tanh: 2-18                        --
=================================================================
Total params: 7,789,308
Trainable params: 7,789,308
Non-trainable params: 0
=================================================================

以上基本上是CGAN代码的前半部分,到生成器代码为止,以下为SGAN代码的部分

class Discriminator(nn.Module):
    def __init__(self, in_channels=3):
        super(Discriminator, self).__init__()

        def discriminator_block(in_filters, out_filters, bn=True):
            """Returns layers of each discriminator block"""
            block = [nn.Conv2d(in_filters, out_filters, 3, 2, 1), nn.LeakyReLU(0.2, inplace=True), nn.Dropout2d(0.25)]
            if bn:
                block.append(nn.BatchNorm2d(out_filters, 0.8))
            return block

        self.conv_blocks = nn.Sequential(
            *discriminator_block(opt.channels, 16, bn=False),
            *discriminator_block(16, 32),
            *discriminator_block(32, 64),
            *discriminator_block(64, 128),
        )

        # The height and width of downsampled image
        ds_size = opt.img_size // 2 ** 4

        # Output layers
        self.adv_layer = nn.Sequential(nn.Linear(128 * ds_size ** 2, 1), nn.Sigmoid())
        self.aux_layer = nn.Sequential(nn.Linear(128 * ds_size ** 2, opt.num_classes + 1), nn.Softmax())

    def forward(self, img):
        out = self.conv_blocks(img)
        out = out.view(out.shape[0], -1)
        validity = self.adv_layer(out)
        label = self.aux_layer(out)

        return validity, label
discriminator = Discriminator().to(device)
discriminator.apply(weights_init)
print(discriminator)
summary(discriminator)
adversarial_loss = nn.BCELoss() 
auxiliary_loss = torch.nn.CrossEntropyLoss()

def generator_loss(fake_output, label):
    gen_loss = adversarial_loss(fake_output, label)
    return gen_loss

def discriminator_loss(output, label):
    disc_loss = adversarial_loss(output, label)
    return disc_loss

cuda = torch.cuda.is_available()
FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if cuda else torch.LongTensor
learning_rate = 0.0002

optimizer_G = optim.Adam(generator.parameters(),     lr = learning_rate, betas=(0.5, 0.999))
optimizer_D = optim.Adam(discriminator.parameters(), lr = learning_rate, betas=(0.5, 0.999))
for epoch in range(opt.n_epochs):
    for i, (imgs, labels) in enumerate(train_loader):

        batch_size = imgs.shape[0]

        # Adversarial ground truths
        valid = Variable(FloatTensor(batch_size, 1).fill_(1.0), requires_grad=False)
        fake = Variable(FloatTensor(batch_size, 1).fill_(0.0), requires_grad=False)
        fake_aux_gt = Variable(LongTensor(batch_size).fill_(opt.num_classes), requires_grad=False)

        # Configure input
        real_imgs = Variable(imgs.type(FloatTensor))
        labels = Variable(labels.type(LongTensor))

        # -----------------
        #  Train Generator
        # -----------------

        optimizer_G.zero_grad()

        # Sample noise and labels as generator input
        z = Variable(FloatTensor(np.random.normal(0, 1, (batch_size, opt.latent_dim))))

        # Generate a batch of images
        gen_imgs = generator((z,labels))

        # Loss measures generator's ability to fool the discriminator
        validity, _ = discriminator(gen_imgs)
        g_loss = adversarial_loss(validity, valid)

        g_loss.backward()
        optimizer_G.step()

        # ---------------------
        #  Train Discriminator
        # ---------------------

        optimizer_D.zero_grad()

        # Loss for real images
        real_pred, real_aux = discriminator(real_imgs)
        d_real_loss = (adversarial_loss(real_pred, valid) + auxiliary_loss(real_aux, labels)) / 2

        # Loss for fake images
        fake_pred, fake_aux = discriminator(gen_imgs.detach())
        d_fake_loss = (adversarial_loss(fake_pred, fake) + auxiliary_loss(fake_aux, fake_aux_gt)) / 2

        # Total discriminator loss
        d_loss = (d_real_loss + d_fake_loss) / 2

        # Calculate discriminator accuracy
        pred = np.concatenate([real_aux.data.cpu().numpy(), fake_aux.data.cpu().numpy()], axis=0)
        gt = np.concatenate([labels.data.cpu().numpy(), fake_aux_gt.data.cpu().numpy()], axis=0)
        d_acc = np.mean(np.argmax(pred, axis=1) == gt)

        d_loss.backward()
        optimizer_D.step()

        batches_done = epoch * len(train_loader) + i
        if batches_done % opt.sample_interval == 0:
            save_image(gen_imgs.data[:25], "images/%d.png" % batches_done, nrow=5, normalize=True)

    print(
        "[Epoch %d/%d] [Batch %d/%d] [D loss: %f, acc: %d%%] [G loss: %f]"
        % (epoch, opt.n_epochs, i, len(train_loader), d_loss.item(), 100 * d_acc, g_loss.item())
    )

运行过程

e:\anaconda3\envs\PGPU\lib\site-packages\torch\nn\modules\container.py:139: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
  input = module(input)
[Epoch 0/50] [Batch 19/20] [D loss: 1.455980, acc: 40%] [G loss: 0.490490]
[Epoch 1/50] [Batch 19/20] [D loss: 1.222127, acc: 72%] [G loss: 0.681366]
[Epoch 2/50] [Batch 19/20] [D loss: 1.224287, acc: 63%] [G loss: 0.916321]
[Epoch 3/50] [Batch 19/20] [D loss: 1.111225, acc: 70%] [G loss: 1.007028]
[Epoch 4/50] [Batch 19/20] [D loss: 1.184606, acc: 75%] [G loss: 0.696607]
[Epoch 5/50] [Batch 19/20] [D loss: 1.352154, acc: 55%] [G loss: 0.747507]
[Epoch 6/50] [Batch 19/20] [D loss: 1.403305, acc: 52%] [G loss: 0.869919]
[Epoch 7/50] [Batch 19/20] [D loss: 1.311451, acc: 50%] [G loss: 0.880048]
[Epoch 8/50] [Batch 19/20] [D loss: 1.413715, acc: 50%] [G loss: 0.674482]
[Epoch 9/50] [Batch 19/20] [D loss: 1.326531, acc: 54%] [G loss: 0.609503]
[Epoch 10/50] [Batch 19/20] [D loss: 1.449468, acc: 48%] [G loss: 0.620321]
[Epoch 11/50] [Batch 19/20] [D loss: 1.367987, acc: 53%] [G loss: 0.717428]
[Epoch 12/50] [Batch 19/20] [D loss: 1.286323, acc: 55%] [G loss: 0.748294]
[Epoch 13/50] [Batch 19/20] [D loss: 1.374772, acc: 51%] [G loss: 0.849943]
[Epoch 14/50] [Batch 19/20] [D loss: 1.303872, acc: 55%] [G loss: 0.887458]
[Epoch 15/50] [Batch 19/20] [D loss: 1.338245, acc: 59%] [G loss: 0.566128]
[Epoch 16/50] [Batch 19/20] [D loss: 1.386614, acc: 59%] [G loss: 0.737729]
[Epoch 17/50] [Batch 19/20] [D loss: 1.378518, acc: 55%] [G loss: 0.559435]
[Epoch 18/50] [Batch 19/20] [D loss: 1.421224, acc: 53%] [G loss: 0.639280]
[Epoch 19/50] [Batch 19/20] [D loss: 1.314460, acc: 54%] [G loss: 0.695454]
[Epoch 20/50] [Batch 19/20] [D loss: 1.279016, acc: 56%] [G loss: 0.810150]
[Epoch 21/50] [Batch 19/20] [D loss: 1.364004, acc: 53%] [G loss: 0.736294]
[Epoch 22/50] [Batch 19/20] [D loss: 1.364638, acc: 52%] [G loss: 0.990328]
[Epoch 23/50] [Batch 19/20] [D loss: 1.322828, acc: 53%] [G loss: 0.731904]
[Epoch 24/50] [Batch 19/20] [D loss: 1.317570, acc: 50%] [G loss: 0.839391]
[Epoch 25/50] [Batch 19/20] [D loss: 1.330042, acc: 55%] [G loss: 0.755845]
[Epoch 26/50] [Batch 19/20] [D loss: 1.354234, acc: 55%] [G loss: 0.652750]
[Epoch 27/50] [Batch 19/20] [D loss: 1.383858, acc: 55%] [G loss: 0.677340]
[Epoch 28/50] [Batch 19/20] [D loss: 1.384538, acc: 52%] [G loss: 0.621817]
[Epoch 29/50] [Batch 19/20] [D loss: 1.314232, acc: 54%] [G loss: 0.783550]
[Epoch 30/50] [Batch 19/20] [D loss: 1.328900, acc: 54%] [G loss: 0.709978]
[Epoch 31/50] [Batch 19/20] [D loss: 1.326728, acc: 54%] [G loss: 0.804180]
[Epoch 32/50] [Batch 19/20] [D loss: 1.346232, acc: 52%] [G loss: 0.775322]
[Epoch 33/50] [Batch 19/20] [D loss: 1.290386, acc: 56%] [G loss: 0.939839]
[Epoch 34/50] [Batch 19/20] [D loss: 1.395943, acc: 50%] [G loss: 0.582599]
[Epoch 35/50] [Batch 19/20] [D loss: 1.394045, acc: 52%] [G loss: 0.716685]
[Epoch 36/50] [Batch 19/20] [D loss: 1.391289, acc: 51%] [G loss: 0.747493]
[Epoch 37/50] [Batch 19/20] [D loss: 1.369082, acc: 50%] [G loss: 0.719075]
[Epoch 38/50] [Batch 19/20] [D loss: 1.401712, acc: 53%] [G loss: 0.645679]
[Epoch 39/50] [Batch 19/20] [D loss: 1.279735, acc: 57%] [G loss: 0.710965]
[Epoch 40/50] [Batch 19/20] [D loss: 1.363157, acc: 56%] [G loss: 0.589386]
[Epoch 41/50] [Batch 19/20] [D loss: 1.334075, acc: 53%] [G loss: 0.774654]
[Epoch 42/50] [Batch 19/20] [D loss: 1.358592, acc: 51%] [G loss: 0.726460]
[Epoch 43/50] [Batch 19/20] [D loss: 1.389814, acc: 50%] [G loss: 0.703020]
[Epoch 44/50] [Batch 19/20] [D loss: 1.363462, acc: 53%] [G loss: 0.691942]
[Epoch 45/50] [Batch 19/20] [D loss: 1.362092, acc: 55%] [G loss: 0.727146]
[Epoch 46/50] [Batch 19/20] [D loss: 1.360469, acc: 53%] [G loss: 0.696875]
[Epoch 47/50] [Batch 19/20] [D loss: 1.385563, acc: 52%] [G loss: 0.661834]
[Epoch 48/50] [Batch 19/20] [D loss: 1.376729, acc: 50%] [G loss: 0.753325]
[Epoch 49/50] [Batch 19/20] [D loss: 1.370506, acc: 51%] [G loss: 0.687326]

总结

  • 从框架图上看,ACGAN就是由CGAN和SGAN结合而来
  • 因此结合的代码也可以成功运行
  • 所以,ACGAN的输入为随机噪声z+条件信息C,而最终输出为真(真1、真2…) 或假也就是SGAN中的分类器功能
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值