进行生成简单数字图片

1.之前只能做一些图像预测,我有个大胆的想法,如果神经网络正向就是预测图片的类别,如果我只有一个类别那就可以进行生成图片,专业术语叫做gan对抗网络
在这里插入图片描述
2.训练代码

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as dset
import matplotlib.pyplot as plt
import os

# 设置环境变量
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

# 定义生成器模型
class Generator(nn.Module):
    def __init__(self, input_dim=100, output_dim=784):
        super(Generator, self).__init__()
        self.fc1 = nn.Linear(input_dim, 256)
        self.fc2 = nn.Linear(256, 512)
        self.fc3 = nn.Linear(512, 1024)
        self.fc4 = nn.Linear(1024, output_dim)
        self.relu = nn.ReLU()
        self.tanh = nn.Tanh()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.tanh(self.fc4(x))
        return x

# 定义判别器模型
class Discriminator(nn.Module):
    def __init__(self, input_dim=784, output_dim=1):
        super(Discriminator, self).__init__()
        self.fc1 = nn.Linear(input_dim, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 256)
        self.fc4 = nn.Linear(256, output_dim)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.sigmoid(self.fc4(x))
        return x

# 加载 MNIST 手写数字图片数据集
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
dataroot = "path_to_your_mnist_dataset"  # 替换为 MNIST 数据集的路径
dataset = dset.MNIST(root=dataroot, train=True, transform=transform, download=True)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=128, shuffle=True)

# 创建生成器和判别器实例
input_dim = 100
output_dim = 784
generator = Generator(input_dim, output_dim)
discriminator = Discriminator(output_dim)

# 定义优化器和损失函数
lr = 0.0002
beta1 = 0.5
optimizer_g = optim.Adam(generator.parameters(), lr=lr, betas=(beta1, 0.999))
optimizer_d = optim.Adam(discriminator.parameters(), lr=lr, betas=(beta1, 0.999))
criterion = nn.BCELoss()

# 训练 GAN 模型
num_epochs = 50
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Device:", device)
generator.to(device)
discriminator.to(device)
for epoch in range(num_epochs):
    for i, data in enumerate(dataloader, 0):
        real_images, _ = data
        real_images = real_images.to(device)
        batch_size = real_images.size(0)  # 获取批次样本数量

        # 训练判别器
        optimizer_d.zero_grad()
        real_labels = torch.full((batch_size, 1), 1.0, device=device)
        fake_labels = torch.full((batch_size, 1), 0.0, device=device)
        noise = torch.randn(batch_size, input_dim, device=device)
        fake_images = generator(noise)
        real_outputs = discriminator(real_images.view(batch_size, -1))
        fake_outputs = discriminator(fake_images.detach())
        d_loss_real = criterion(real_outputs, real_labels)
        d_loss_fake = criterion(fake_outputs, fake_labels)
        d_loss = d_loss_real + d_loss_fake
        d_loss.backward()
        optimizer_d.step()

        # 训练生成器
        optimizer_g.zero_grad()
        noise = torch.randn(batch_size, input_dim, device=device)
        fake_images = generator(noise)
        fake_outputs = discriminator(fake_images)
        g_loss = criterion(fake_outputs, real_labels)
        g_loss.backward()
        optimizer_g.step()

        # 输出训练信息
        if i % 100 == 0:
            print("[Epoch %d/%d] [Batch %d/%d] [D loss: %.4f] [G loss: %.4f]"
                  % (epoch, num_epochs, i, len(dataloader), d_loss.item(), g_loss.item()))

    # 保存生成器的权重和图片示例
    if epoch % 10 == 0:
        with torch.no_grad():
            noise = torch.randn(64, input_dim, device=device)
            fake_images = generator(noise).view(64, 1, 28, 28).cpu().numpy()
            fig, axes = plt.subplots(nrows=8, ncols=8, figsize=(12, 12), sharex=True, sharey=True)
            for i, ax in enumerate(axes.flatten()):
                ax.imshow(fake_images[i][0], cmap='gray')
                ax.axis('off')
            plt.subplots_adjust(wspace=0.05, hspace=0.05)
            plt.savefig("epoch_%d.png" % epoch)
            plt.close()
        torch.save(generator.state_dict(), "generator_epoch_%d.pth" % epoch)

3.测试模型的代码

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.utils import save_image

# 定义生成器模型
class Generator(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Generator, self).__init__()
        self.fc1 = nn.Linear(input_dim, 256)
        self.fc2 = nn.Linear(256, 512)
        self.fc3 = nn.Linear(512, 1024)
        self.fc4 = nn.Linear(1024, output_dim)

    def forward(self, x):
        x = F.leaky_relu(self.fc1(x), 0.2)
        x = F.leaky_relu(self.fc2(x), 0.2)
        x = F.leaky_relu(self.fc3(x), 0.2)
        x = torch.tanh(self.fc4(x))
        return x

# 创建生成器模型
generator = Generator(input_dim=100, output_dim=784)

# 加载预训练权重
generator_weights = torch.load("generator_epoch_40.pth", map_location=torch.device('cpu'))

# 将权重加载到生成器模型
generator.load_state_dict(generator_weights)

# 生成随机噪声
noise = torch.randn(1, 100)

# 生成图像
fake_image = generator(noise).view(1, 1, 28, 28)

# 保存生成的图片
save_image(fake_image, "generated_image.png", normalize=False)

#测试结果,由于我的训练集是数字的,所以会生成各种各样的数字,下面明显的是1
在这里插入图片描述
#应该也是1
在这里插入图片描述

#再次运行,我也看不出来,不过只要我训练只有一个种类的问题就可以生成这个种类的图像
在这里插入图片描述
#搞定黑白图,那彩色图应该距离不远了,我需要改进的是把对抗网络的代码改为训练一个种类的图形,不过我感觉这种图形具有随机性,虽然通过训练我们得到了所有图像他们的规律,但是如果需要正常点的图片还是挺难的,就像是上面这张人都不一定知道他是什么东西(在没有颜色的情况下)总结就是精度不够,而且随机性太强了,现在普遍图片AI生成工具具有这个缺点(生成的物体可能会扭曲,挺阴间的),而且生成的图片速度慢,如果谁比较受益那一定是老黄(英伟达)哈哈哈
//比如下面这个图片生成视频的网站
https://app.runwayml.com/login

#每一帧看起来都没有问题,就是连起来变成视频不自然,如果有改进方法的话那可能需要引入重力/加速度/光处理 等等物理公式,来让图片更自然…
在这里插入图片描述
在这里插入图片描述

  • 6
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是一个简单的 ACGAN 生成对抗网络的 PyTorch 代码,用于生成指定数字的手写数字图片: ```python import torch import torch.nn as nn import torch.optim as optim from torchvision import datasets, transforms from torch.utils.data import DataLoader from torch.autograd import Variable import numpy as np # 定义生成器网络 class Generator(nn.Module): def __init__(self, z_dim=100, num_classes=10): super(Generator, self).__init__() self.z_dim = z_dim self.num_classes = num_classes self.fc1 = nn.Linear(z_dim + num_classes, 256) self.fc2 = nn.Linear(256, 512) self.fc3 = nn.Linear(512, 1024) self.fc4 = nn.Linear(1024, 28*28) self.relu = nn.ReLU() self.tanh = nn.Tanh() def forward(self, z, labels): inputs = torch.cat([z, labels], dim=1) x = self.relu(self.fc1(inputs)) x = self.relu(self.fc2(x)) x = self.relu(self.fc3(x)) x = self.tanh(self.fc4(x)) return x.view(-1, 1, 28, 28) # 定义判别器网络 class Discriminator(nn.Module): def __init__(self, num_classes=10): super(Discriminator, self).__init__() self.num_classes = num_classes self.conv1 = nn.Conv2d(1, 64, kernel_size=4, stride=2, padding=1) self.conv2 = nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1) self.fc1 = nn.Linear(128*7*7 + num_classes, 1024) self.fc2 = nn.Linear(1024, 1) self.leaky_relu = nn.LeakyReLU(0.2) self.sigmoid = nn.Sigmoid() def forward(self, x, labels): x = self.leaky_relu(self.conv1(x)) x = self.leaky_relu(self.conv2(x)) x = x.view(-1, 128*7*7) inputs = torch.cat([x, labels], dim=1) x = self.leaky_relu(self.fc1(inputs)) x = self.sigmoid(self.fc2(x)) return x # 定义训练函数 def train(generator, discriminator, dataloader, num_epochs=200, z_dim=100, num_classes=10, lr=0.0002, beta1=0.5, beta2=0.999): criterion = nn.BCELoss() g_optimizer = optim.Adam(generator.parameters(), lr=lr, betas=(beta1, beta2)) d_optimizer = optim.Adam(discriminator.parameters(), lr=lr, betas=(beta1, beta2)) for epoch in range(num_epochs): for i, (images, labels) in enumerate(dataloader): batch_size = images.size(0) images = Variable(images) labels = Variable(labels) # 训练判别器 d_optimizer.zero_grad() real_labels = Variable(torch.ones(batch_size)) fake_labels = Variable(torch.zeros(batch_size)) # 训练判别器使用真实图片 real_outputs = discriminator(images, labels) d_loss_real = criterion(real_outputs, real_labels) # 训练判别器使用生成生成的假图片 z = Variable(torch.randn(batch_size, z_dim)) fake_labels = Variable(torch.LongTensor(np.random.randint(0, num_classes, batch_size))) fake_images = generator(z, fake_labels) fake_outputs = discriminator(fake_images, fake_labels) d_loss_fake = criterion(fake_outputs, fake_labels) d_loss = d_loss_real + d_loss_fake d_loss.backward() d_optimizer.step() # 训练生成器 g_optimizer.zero_grad() z = Variable(torch.randn(batch_size, z_dim)) fake_labels = Variable(torch.LongTensor(np.random.randint(0, num_classes, batch_size))) fake_images = generator(z, fake_labels) fake_outputs = discriminator(fake_images, fake_labels) g_loss = criterion(fake_outputs, real_labels) g_loss.backward() g_optimizer.step() if (i+1) % 100 == 0: print('Epoch [%d/%d], Step [%d/%d], d_loss: %.4f, g_loss: %.4f' % (epoch+1, num_epochs, i+1, len(dataloader), d_loss.data[0], g_loss.data[0])) # 加载 MNIST 数据集 transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=(0.5,), std=(0.5,)) ]) train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True) train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True) # 定义模型和训练 generator = Generator() discriminator = Discriminator() train(generator, discriminator, train_dataloader) ``` 这个代码可以生成指定数字的手写数字图片,你可以在 `train` 函数中指定要生成数字,例如: ```python z = Variable(torch.randn(10, 100)) labels = Variable(torch.LongTensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])) images = generator(z, labels) ``` 这将生成 0 到 9 的 10 个手写数字图片

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值