(二)CGAN实战

(二)CGAN实战-按条件生成MNIST手写数字

一、CGAN论文要点
  1. 普通GAN的目标函数:
    在这里插入图片描述
    CGAN目标函数:
    在这里插入图片描述
  2. CGAN网络结构示例:
    在这里插入图片描述
二、MNIST实战思想

我主要是这样想的:
  首先是训练判别器,通过把手写数字图片(32*32)和其对应的标签(经过嵌入层,将该数字对应的标签嵌入成10维向量)一起输入到判别器作为真实图片输入;将随机的latent vector (100维) 加上随机的标签一同输入生成器输出fake图片,再将fake图片连同之前输入生成器的标签一同输入判别器,作为判别器评价fake图片的来源。然后再训练生成器,将随机的latent vector (100维) 加上随机的标签一同输入生成器产生图片,再将图片连同之前的标签一起输入判别器,交给其评价。
  MNIST数据集部分图片展示:已经预处理为32 * 32
  已经reshape成32 * 32

三、完整代码(详细注释)
import argparse
import os
import numpy as np
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.functional as F
import torch
import torchvision
import matplotlib.pyplot as plt
from torch.utils.tensorboard import SummaryWriter
import matplotlib.animation as animation
from IPython.display import HTML
from torch.autograd import Variable
import skimage.io as io

parser = argparse.ArgumentParser()
parser.add_argument('--dataroot',default='../dataset', help='root path for dataset')
parser.add_argument('--epochs', type = int, default=200, help='number of epochs of training')
parser.add_argument('--batch_size', type = int, default=128, help='size of the batchs')
parser.add_argument('--lr', type = float, default=0.0002, help='Adam: learning rate')
parser.add_argument('--beta1', type = float, default=0.5, help='adam: decay of first order momentum of gradient')
parser.add_argument('--beta2', type=float, default=0.999, help='adam: decay of first order momentum of gradient')
parser.add_argument('--n_gpu', type = int, default=1, help='number of gpu threads to use during batch generation')
parser.add_argument('--latent_dim', type = int, default=100, help='dimensionality of the latent space')
parser.add_argument('--n_classes', type = int, default=10, help='number of classes for dataset')
parser.add_argument('--img_size', type=int, default=32, help='size of each image dimension')
parser.add_argument('--channels', type = int, default=1, help='numbers of image channels')
parser.add_argument('--sample_interval', type=int, default=400, help='interval between image sampling')
args = parser.parse_args(args=[])
print(args)
img_shape = (args.channels, args.img_size, args.img_size)
np.random.seed(999)
torch.manual_seed(999)
device = torch.device('cuda: 0' if (torch.cuda.is_available() and args.n_gpu >0) else 'cpu')

# create the dataset
transform  = transforms.Compose([
    transforms.Resize(args.img_sizea),
    transforms.ToTensor(),
    transforms.Normalize((0.5a),(0.5)) # 这里一定要改为一个0.5,表示单通道
])
dataset = torchvision.datasets.MNIST(root = args.dataroot,train=True, transform=transform, download=False)
dataloader = DataLoader(dataset = dataset, batch_size=args.batch_size, shuffle = True, num_workers=2)

# 查看图片
# sample = next(iter(dataloadr))
# plt.figure(figsize = (8, 8))
# plt.axis('off')
# plt.title('MNIST SAMPLE')

# plt.imshow(np.transpose(torchvision.utils.make_grid(sample[0][:64], normalize = True), (1,2,0)))

# ======================
#      Generator
#=======================
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        # num_embeddings表示词典的词的数量,embedding_dim表示一个词表示成10个维度
        self.embedding = nn.Embedding(num_embeddings = args.n_classes,embedding_dim = 10)
        self.model = nn.Sequential(
            # 输入的特征数量是100+10100表示隐向量长度,10表示该数字的label经过嵌入层后的维度10
            nn.Linear(args.latent_dim+10, 128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(0.2, inplace=True),
            
            nn.Linear(128, 256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(0.2, inplace=True),
            
            nn.Linear(256, 512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.2, inplace=True),
            
            nn.Linear(512, 1024),
            nn.BatchNorm1d(1024),
            nn.LeakyReLU(0.2, inplace=True),
            # np.prod()函数:计算给定数组的元素的乘积,axis指定了方向,1表示行的乘积,默认计算所有元素乘积,返回值是一个array数组
            nn.Linear(1024, int(np.prod(img_shape))),
            nn.Tanh()
        )
    def forward(self, noise, labels):
        # Concatenate label embedding and image to produce input
         # 这里一定要先把label转为LongTensor,因为embedding只接受LongTensor类型数据
        labels = self.embedding(labels)
        labels = labels.view(labels.size(0), -1)
        gen_input = torch.cat((labels, noise.view(noise.size(0), -1)), dim = -1) # --> [batch_size, 110]
        img = self.model(gen_input)  # --> torch.Size([batch_size, 3, 32, 32])
        img = img.view(img.size(0), *img_shape) # -->[1,32,32]  32*32 = 1024
        return img

# ======================
#    Discriminator
#=======================
class Discrimminator(nn.Module):
    def __init__(self):
        super(Discrimminator, self).__init__()
        self.embedding = nn.Embedding(num_embeddings=args.n_classes, embedding_dim=10)
        self.model = nn.Sequential(
            nn.Linear(int(np.prod(img_shape)) + 10, 512),
            nn.Dropout(p = 0.5, inplace=True),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.2, inplace=True),
            
            nn.Linear(512, 256),
            nn.Dropout(p = 0.5, inplace=True),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(0.2, inplace=True),
            
            nn.Linear(256, 128),
            nn.Dropout(p = 0.5, inplace=True),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(0.2, inplace=True),
            
            nn.Linear(128, 1),
            nn.Sigmoid()
        )
    def forward(self, img, labels):
        # Concatenate label embedding and image to produce input
        labels = self.embedding(labels)
        labels = labels.view(labels.size(0), -1)
        d_input = torch.cat((img.view(img.size(0), -1), labels) ,dim = -1) # --> [batch_size, 1034]
        result = self.model(d_input)
        return result
netG = Generator().to(device)
netD = Discrimminator().to(device)
print(netG)
print(netD)
# 在tensorboard中显示网络结构
writer = SummaryWriter('runs/CGAN/discriminator')
noise = torch.randn(128, args.latent_dim, 1, 1, device = device)
noise_with_labels = torch.randint(0, args.n_classes, size = (128, 1, 1), device = device)
noise_with_labels = torch.cuda.LongTensor(noise_with_labels)
dataiter = iter(dataloader)
images, labels = next(dataiter)
labels = labels.to(device)
images = images.to(device)
labels = torch.cuda.LongTensor(labels)
with writer as w:
#     w.add_graph(netG, (noise, noise_with_labels))
    w.add_graph(netD, (images, labels)) #一个文件夹下存储的网络结构文件只能显示最开始的,所以我这里把这两个网络结构分别保存在
                                        #generator和discriminator文件夹下


real = 1.0
fake = 0.0
criterion = nn.BCELoss()
# Optimizers
optimizer_G = torch.optim.Adam(netG.parameters(), lr = args.lr, betas=(args.beta1, args.beta2))
optimizer_D = torch.optim.Adam(netD.parameters(), lr = args.lr, betas=(args.beta1, args.beta2))  

print("Starting Training Loop...")
# ----------
#  Training
# ----------
for epoch in range(args.epochs):
    D_real = 0.0
    D_G_1 = 0.0
    D_G_2 = 0.0
    d_loss = 0.0
    g_loss = 0.0
    for i ,(imgs, labels) in enumerate(dataloader):
        b_size = imgs.shape[0]
        
        # Adversarial ground truths
        real_label = torch.full((b_size, ), real, device = device)
        fake_label = torch.full((b_size,), fake, device = device)
        # 产生假图片
        noise = torch.randn(b_size, args.latent_dim, 1, 1, device = device)
        noise_with_labels = torch.randint(0, args.n_classes, size = (b_size, 1, 1), device = device)
        noise_with_labels = torch.cuda.LongTensor(noise_with_labels)
        fake_images = netG(noise, noise_with_labels)
        
        real_imgs = imgs.to(device)
        labels = labels.to(device)
        
        # -----------------
        #  Train Discriminator
        # -----------------
        netD.zero_grad()
        # Loss for real images
        output_real = netD(real_imgs, labels)
        d_real_loss = criterion(output_real, real_label)
        D_real = output_real.mean().item()
        # Loss for fake images
        output_fake = netD(fake_images, noise_with_labels)
        d_fake_loss = criterion(output_fake, fake_label)
        D_G_1 = output_fake.mean().item()
        
        # Total discriminator loss
        d_loss = (d_real_loss + d_fake_loss)/2
        d_loss.backward()
        optimizer_D.step()
        
        # -----------------
        #  Train Generator
        # -----------------
        netG.zero_grad()
        fake_images = netG(noise, noise_with_labels)
        output = netD(fake_images, noise_with_labels)
        g_loss = criterion(output, real_label)
        D_G_2 = output.mean().item()
        g_loss.backward()
        optimizer_G.step()
        
    # Output training stats
    print('[{}/{}]  Loss_G:{:.4f}  Loss_D:{:.4f}  D_to_True:{:.4f}  D_to_fake:{:.4f}  D_to_G:{:.4f}'.format(
                   epoch, args.epochs, g_loss, d_loss, D_real, D_G_1, D_G_2))
    writer.add_scalar('G_LOSS', g_loss, global_step = epoch)
    writer.add_scalar('D_LOSS', d_loss, global_step = epoch)
    if epoch % 5 == 0:
        with torch.no_grad():
            """Saves a grid of generated digits ranging from 0 to n_classes"""
            # Sample noise
            z = torch.randn(100, args.latent_dim, 1, 1, device = device)
            # Get labels ranging from 0 to n_classes for n rows
            labels = torch.tensor([num for _ in range(10) for num in range(10)])
            labels = torch.reshape(labels, (-1, 1, 1))
            labels = labels.to(device)
            labels = torch.cuda.LongTensor(labels)
            gen_images = netG(z, labels) # output-->[batch_size, 1, 32, 32]
            save_image(gen_images.data, 'Picture/{}.png'.format(epoch), nrow = 10, normalize=True)
            
            img_list.append(torchvision.utils.make_grid(gen_images, normalize = True))
            
        # do checkpointing 只保存参数
        torch.save(netG.state_dict(), 'Model/netG_epoch_{}.pth'.format(epoch))
        torch.save(netD.state_dict(), 'Model/netD_epoch_{}.pth'.format(epoch))
    
        
# 在tensorboard中显示网络结构
noise = torch.randn(64, args.latent_dim, 1, 1, device = device)
noise_with_labels = torch.randint(0, args.n_classes, size = (64, 1, 1), device = device)
noise_with_labels = torch.cuda.LongTensor(noise_with_labels)
dataiter = iter(dataloader)
images, labels = next(dataiter)
with writer as w:
    w.add_graph(netG, noise, noise_with_labels)
    w.add_graph(netD, images, labels)

# 动画演示:
img_list = []
for i in range(15):
    temp = io.imread('./Picture/{}.png'.format(i*5))
    img_list.append(temp)
# 动画演示
fig = plt.figure(figsize=(8,8))
plt.axis("off")
ims = [[plt.imshow(i, animated=True)] for i in img_list]
ani = animation.ArtistAnimation(fig, ims, interval=1000, repeat_delay=1000, blit=True)

HTML(ani.to_jshtml())
        

四、结果展示

到75轮差不多已经收敛。
epoch = 5时:
在这里插入图片描述
epoch = 25时:
在这里插入图片描述
epoch = 75时:
在这里插入图片描述
部分训练:
在这里插入图片描述

五、遇到的问题及解决

一、nn.Linear()层的输入不能是(batch_size, C, H, W)的形式,最后一个维度必须和Linear层的输入的特征节点数匹配(要先进行reshape),例如:

model = nn.Sequential(
   nn.Linear(256, 128)
)
a = torch.randn(128, 3, 32 , 32) #[16 x 2], m2: [32 x 64]
a = torch.reshape(a, (-1, 256))
b = model(a)
b.shape
输出: torch.Size([1536, 128])

二、nn.Embedding()层的输入必须要是LongTensor类型的数据,如果输入的数据是gpu中运行的,则为torch.cuda.LongTensor,如果输入的数据是cpu运行的,则为torch.LongTensor,总之,一定要对应好设备,例如:

----第一种情况:在cpu中运行的数据----
device = torch.device('cuda: 0' if (torch.cuda.is_available()) else 'cpu')
model = nn.Sequential(
   nn.Embedding(10, 3)
)
a = torch.tensor([1,2,3])
output = model(a)
output.size()
输出:torch.Size([3, 3])

----第二种情况:在GPU中运行的数据----
device = torch.device('cuda: 0' if (torch.cuda.is_available()) else 'cpu')
model = nn.Sequential(
   nn.Embedding(10, 3)
)
model.to(device)
a = torch.cuda.LongTensor(torch.tensor([1,2,3,4],device = device))
output = model(a)
output.size()
输出:torch.Size([4, 3])

有一个bug:

# 如果是从ndarray数组转为tensor再进行就会报错:
# TypeError: expected Long (got Int)

labels = np.array([1,2,3])
labels = torch.from_numpy(labels)
labels = labels.to(device)
labels = torch.cuda.LongTensor(labels)

下面就不会报错:
labels = torch.tensor([1,2,3])
labels = labels.to(device)
labels = torch.cuda.LongTensor(labels)

三、动画演示:

# img_list里面是列表再嵌套Artist列表
img_list = []
for i in range(15):
    temp = io.imread('./Picture/{}.png'.format(i*5))
    img_list.append(temp)
# 动画演示
fig = plt.figure(figsize=(8,8))
plt.axis("off")
ims = [[plt.imshow(i, animated=True)] for i in img_list]
ani = animation.ArtistAnimation(fig, ims, interval=1000, repeat_delay=1000, blit=True)

HTML(ani.to_jshtml())

四、报错:TypeError: init() takes 1 positional argument but 3 were given
这个错误是是没有实例化网络或者没有通过实例化网络对象来调用,直接调用类来进行forward。
神经网络构建的开头部分:

class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder, self).__init__()
正常实例化一个简单的神经网络应下面这样的,注意是没有参数的,这和__init__是一样的,或者只有一个self参数:
auto=AutoEncoder()
如果把类当成 了对象直接调用,就会是这样:
out=AutoEncoder(input)
正确做法是:
out = auto(input)
  • 3
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值