数据集
MNIST
数据预处理
超参设置
batch_size = 100
latent_size = 100
num_epochs = 100
mnist_dataset = MNIST(root='data', train=True, download=True, transform=ToTensor())
输出数据集相关信息
print(f"The dataset has {len(mnist_dataset)} items.")
sample_image, sample_label = mnist_dataset[0]
print(f"The image size is {sample_image.shape} and the number inside it should be {sample_label}\n\n")
plt.imshow(sample_image[0], cmap="gray")
print(
f"The min value of dataset is: {mnist_dataset.data.min()}\n"
f"The max value of dataset is: {mnist_dataset.data.max()}\n"
f"The mean value of dataset is: {mnist_dataset.data.float().mean()}\n"
f"The STD of dataset is: {mnist_dataset.data.float().std()}"
)
对数据集进行归一化处理
# 归一化,将0-255的pixels映射到-1,1上
data = mnist_dataset.data.float() / 255.0 # 映射到0,1
data_scaled = data.mul(2).sub(1) # 通过对0,1的分布x2-1,将0,1映射到-1,1上
labels = mnist_dataset.targets
dataset = torch.utils.data.TensorDataset(data_scaled.view(-1,1,28,28), labels)
# 通过TensorDataset将处理好的图像和标签打包给dataset,为之后的dataloader准备
data_loader = DataLoader(dataset, batch_size=100, shuffle=True)
输出归一化后数据集相关信息
print(
f"The min value of dataset is: {data_scaled.min()}\n"
f"The max value of dataset is: {data_scaled.max()}\n"
f"The mean value of dataset is: {data_scaled.float().mean()}\n"
f"The STD of dataset is: {data_scaled.float().std()}"
)
print(f"A snapshot of a scaled sample: \n\n{data_scaled[0][9:15, 9:15]}")
基本组成结构
Discriminator
class Discriminator(nn.Module):
def __init__(self, in_channels = 1, features = [64, 64]) -> None: # in_channels 表示输入图像的通道数,默认为1(灰度图)features是一个包含两个元素的列表,指定卷积层的特征图通道数
super(Discriminator, self).__init__()
self.conv1 = nn.Conv2d(in_channels, features[0], kernel_size=3, stride=2, padding=1)
self.conv2 = nn.Conv2d(features[0], features[1], kernel_size=3, stride=2, padding=1)
self.leaky_relu = nn.LeakyReLU(0.2) # 激活函数,用于在神经网络中引入非线性。
# 作用是为负输入值引入一个小的负斜率。对于正输入值,它的功能与ReLU相同,即输出等于输入,对于负输入值,它引入一个小的负斜率,使输出值稍微大于零。
self.dropout = nn.Dropout(0.4) # 以概率0.4的方式随机丢弃输入张量的一部分元素,以减少过拟合风险
self.fc = nn.Linear(features[1]*7*7, 1)
# 全连接层,将第二卷积层的输出通道数*height*width 作为输入向量,展平成1维向量输出
# features[0] [1]分别代表第1,2层卷积层的输出通道数
def forward(self, x):
x = self.conv1(x)
x = self.leaky_relu(x)
x = self.dropout(x)
x = self.conv2(x)
x = self.leaky_relu(x)
x = self.dropout(x)
x = x.view(x.shape[0], -1) # 将x展平为一个一维向量,[batch_size, channels * height * width]
x = self.fc(x)
return torch.sigmoid(x)
一文搞懂激活函数(Sigmoid/ReLU/LeakyReLU/PReLU/ELU) - 知乎 (zhihu.com)
Generator
class Generator(nn.Module):
def __init__(self, latent_dim, features = [128,128]) -> None:
super(Generator, self).__init__()
self.fc = nn.Linear(latent_dim, features[0]*7*7) # 7*7是特征图的尺寸
self.leaky_relu = nn.LeakyReLU(0.2)
self.conv1 = nn.ConvTranspose2d(features[0], features[1], kernel_size=4, stride=2, padding=1)
self.conv2 = nn.ConvTranspose2d(features[1], 1, kernel_size=4, stride=2, padding=1)
def forward(self, x):
x = self.fc(x)
x = self.leaky_relu(x)
x = x.view(-1, 128, 7, 7)
x = self.conv1(x)
x = self.leaky_relu(x)
x = self.conv2(x)
return torch.tanh(x)
定义损失与参数优化方法
D = Discriminator().to(device)
G = Generator(latent_size).to(device)
criterion = nn.BCELoss() # 使用二元交叉熵损失函数 BCELoss
d_optimizer = torch.optim.Adam(D.parameters(), lr=0.0002)
g_optimizer = torch.optim.Adam(G.parameters(), lr=0.0002)
分类问题的损失函数: 二元交叉熵(nn.BCELoss)和交叉熵(nn.CrossEntroyLoss) - 知乎 (zhihu.com)
模型训练
梯度置零
def reset_grad():
# 将G和D中梯度置零
d_optimizer.zero_grad()
g_optimizer.zero_grad()
Discriminator 训练
def train_discriminator(images):
# 在每一个batch中计算梯度前进行重置,以免上一个batch的梯度影响这一个batch的梯度
reset_grad()
# 为每一个real和fake images创造label
real_labels = torch.ones(batch_size, 1).to(device)
fake_labels = torch.ones(batch_size, 1).to(device)
# 为real images 计算loss和score
outputs = D(images)
d_loss_real = criterion(outputs, real_labels)
real_score = outputs
# 生成fake images 并为其计算loss和scores
z = torch.randn(batch_size, latent_size).to(device)
fake_images = G(z)
outputs = D(fake_images)
d_loss_fake = criterion(outputs, fake_labels)
fake_score = outputs
# 合并real images和fake images的loss
d_loss = d_loss_real + d_loss_fake
# 计算D模块中的梯度
d_loss.backward()
# 用计算出来的梯度更新D中的参数
d_optimizer.step()
# 返回real 和fake iamges的loss和scores
return d_loss, real_score, fake_score
Generator 训练
def train_generator():
# 每一个batch计算梯度前进行梯度重置
reset_grad()
# 生成fake images并计算loss
z = torch.randn(batch_size, latent_size).to(device)
fake_images = G(z)
labels = torch.ones(batch_size, 1).to(device)
g_loss = criterion(D(fake_images), labels)
# 反向传播loss并优化G的参数
g_loss.backward()
g_optimizer.step()
# 返回loss和fake images
return g_loss, fake_images
生成过程
保存real images
# 检查samples目录是否存在,如果没有创建它
sample_dir = 'samples'
if not os.path.exists(sample_dir):
os.makedirs(sample_dir)
# 保存一些real images
for images, _ in data_loader:
# 改变image tensor的形状,使之成为[batch_size, 1, 28,28]
images = images.reshape(images.size(0), 1, 28,28)
# 将图片像一个grid保存到指定目录下,命名为'real_images.png'
save_image(images, os.path.join(sample_dir, 'real_images.png'), nrow=10)
# 完成第一个batch后退出循环
break
保存fake images
# 生成一批随机的噪声向量
sample_vectors = torch.randn(batch_size, latent_size).to(device)
# 定义一个函数将fake image保存到指定目录中
def save_fake_images(index):
# 从随机噪声向量中生成fake images
fake_images = G(sample_vectors)
# 将fake images的形状改变为[batch_size, 1, 28, 28]
fake_images = fake_images.reshape(fake_images.size(0), 1, 28, 28)
fake_fname = 'fake_images-{0:0=4d}.png'.format(index)
print('Saving', fake_fname)
# 将图片像一个grid保存到指定目录下,命名为'fake_images.png'
save_image(fake_images, os.path.join(sample_dir, fake_fname), nrow=10)
# 调用第一组fake images 用save_fake_images来保存图像
save_fake_images(0)
Image(os.path.join(sample_dir, 'fake_images-0000.png'))
训练
total_step = len(data_loader)
d_losses, g_losses, real_socres, fake_scores = [], [], [], []
for epoch in range(num_epochs):
for i, (images, _) in enumerate(data_loader):
# 载入一个batch的图像和对应的labels
# image shape:[batch_size, channels, height, width]
# labels shape: [batch_size, ]
# 将图片传入GPU
images = images.to(device)
# 训练D,并传出D 的loss和scores
d_loss, real_socre, fake_score = train_discriminator(images)
# 训练G,并传出loss和生成的的fake image
g_loss, fake_images = train_generator
g_loss, fake_images = train_generator
# 由于G的计算较为复杂,D的计算较为简单,为保证G和D训练效果相当,对G训练两次,对D训练一次
# 检查loss
if (i+1) % 200 == 0:
d_losses.append(d_loss.item())
g_losses.append(g_loss.item())
real_socres.append(real_socre.mean().item())
fake_scores.append(fake_score.mean().item())
print('Epoch [{}/{}], Step [{}/{}], d_loss: {:.4f}, g_loss: {:.4f}, D(x): {:.2f}, D(G(z)): {:.2f}'
.format(epoch, num_epochs, i+1, total_step, d_loss.item(), g_loss.item(),
real_socre.mean().item(), fake_score.mean().item()))
# 保存生成的fake images
save_fake_images(epoch+1)
实验结果
视频结果
将每一个生成的图片作为帧,制作成视频的形式进行呈现
gans_training
过程以及fake images和real images对比
生成过程截图
最终生成结果
real image