ds证据理论python实现_ALI模型理论以及Python实现

https://openreview.net/forum?id=B1ElR4cgg

模型结构和明天要发BiGAN模型一模一样,但是两篇论文的作者都是独立完成自己的内容的。而且从写作的风格来看emmm完全不一样

ALI跟BiGAN的设计一模一样,但是就是没有加Latent regressor。虽然在ALI中也简要地谈到了这个Latent regressor。

并且根据ALI中的模型(G, E,D的架构)更容易实现,条理更加清晰,模型的结构设计实现也很容易。

ALI和BiGAN的对比

整体的设计上一模一样,这是共同点。并且两者都是独立设计的。

  1. ALI虽然提到Latent regressor但是,并没有使用。(只是说可以用来作为一个正则化,提高精度的额外的方法);BiGAN则放了较大的笔墨在这个regressor上。

  2. ALI结构更加清晰,并且各个模块的训练对应的损失也较大的很清晰;BiGAN虽然有在语言上大致描述为什么,但是描述的不够直观清晰,而且GAN训练本来就存在大量的坑,稍微'合理'修改某个小细节,就会导致训练不出结果。

  3. ALI对于E的解释上做得比较好(E可以理解为另外的一种G),这样看来都是来fool D的所以也是一种对抗,较为直观。并且ALI的数学分析部分和GAN承接的更加好,写得更加清晰。

b0a2954811b60d3c3dec7a0c41e998ea.png

计算方式,使用L2范数

两者虽然都谈到了Latent regressor,但是ALI更侧重笔墨于模型的结构的设计(但是画图不行)。BiGAN虽然更侧重于Latent regressor,但是结构画图相当不错。可以说是非常喜剧了。

给个对比:

32922ec45396e2d8933df2fc033e11b0.png

两个结构说的是一回事,当时看到真的笑死。

c5d40640ba5e9e66dc6b156510d2777d.png

虽然谈到了latent regressor,但是算法中并没有交代使用。

800a3a7ee9123b0e855c662c57263ff5.png

BiGAN虽然交代了使用,但是BiGAN没有给损失的具体写法,对于E的训练要自己设计。

可能是两者都或多或少有点问题,所以17年的ICLR就把两篇都录用了。

(或少:应该是ALI,或多多半是BiGAN)

后来就常用 ALI/BiGAN来表示这个模型。


恰饭


实验

实验相比于BiGAN没有使用latent regressor,但是效果居然也还行。

按照论文实验操作一样,第一行是G(E(x)),第二行是x。

x来源是真实数据。通过E学习到x的隐式特征z,输入给G,让G生成。

99553413a77536ef11e2d7f767cedeb5.png

1a00d70f4520cf43f3db8c21f1a44da9.png

edf7730360c69e5ec35bd78725636fc8.png

18060262250841c74d4318118840d72d.png

main.py

import osimport torchfrom torch.utils.data import Dataset, DataLoaderimport torch.nn as nnfrom model import Generator, Discriminator, Encoderimport torchvisionimport itertoolsimport matplotlib.pyplot as pltimport torchvision.utils as vutilsimport numpy as npif __name__ == '__main__':    LR = 0.0002    EPOCH = 100  # 50    BATCH_SIZE = 100    N_IDEAS = 128    lam = 1    DOWNLOAD_MNIST = False    TRAINED = False    mnist_root = '../Conditional-GAN/mnist/'    if not (os.path.exists(mnist_root)) or not os.listdir(mnist_root):        # not mnist dir or mnist is empyt dir        DOWNLOAD_MNIST = True    train_data = torchvision.datasets.MNIST(        root=mnist_root,        train=True,  # this is training data        transform=torchvision.transforms.ToTensor(),  # Converts a PIL.Image or numpy.ndarray to        # torch.FloatTensor of shape (C x H x W) and normalize in the range [0.0, 1.0]        download=DOWNLOAD_MNIST,    )    train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)    torch.cuda.empty_cache()    if TRAINED:        G = torch.load('G.pkl').cuda()        D = torch.load('D.pkl').cuda()        E = torch.load('E.pkl').cuda()    else:        G = Generator(N_IDEAS).cuda()        D = Discriminator().cuda()        E = Encoder(input_size=1, out_size=N_IDEAS).cuda()    optimizerG_E = torch.optim.Adam(itertools.chain(G.parameters(), E.parameters()), lr=LR)    optimizerD = torch.optim.Adam(D.parameters(), lr=LR)    l_c = nn.MSELoss()    for epoch in range(EPOCH):        tmpD, tmpG_E, tmpE = 0, 0, 0        for step, (x, y) in enumerate(train_loader):            # x            x = x.cuda()            z = torch.randn((x.shape[0], N_IDEAS, 1, 1)).cuda()            # z, G, D            G_z = G(z)            D_G_z = torch.mean(D(G_z, z))  # fake            # x, E, D            E_x = E(x)            D_E_x = torch.mean(D(x, E_x))  # real            D_loss = -torch.mean(torch.log(D_E_x) + torch.log(1 - D_G_z))            Latent_regress = l_c(z, E(G_z))            G_E_loss = -torch.mean(torch.log(1 - D_E_x) + torch.log(D_G_z))  # + lam * Latent_regress            optimizerD.zero_grad()            D_loss.backward(retain_graph=True)            optimizerD.step()            optimizerG_E.zero_grad()            G_E_loss.backward(retain_graph=True)            optimizerG_E.step()            tmpD_ = D_loss.cpu().detach().data            tmpG_E_ = G_E_loss.cpu().detach().data            tmpE_ = Latent_regress.cpu().detach().data            tmpD += tmpD_            tmpG_E += tmpG_E_            tmpE += tmpE_        tmpD /= (step + 1)        tmpG_E /= (step + 1)        tmpE /= (step + 1)        print(            'epoch %d avg of loss: D: %.6f, G_E: %.6f, latent: %.6f' % (epoch, tmpD, tmpG_E, tmpE)        )        if epoch % 2 == 0:            # x = x.cuda()            G_imgs = G(E(x)).cpu().detach()            fig = plt.figure(figsize=(10, 10))            plt.axis("off")            plt.imshow(                np.transpose(vutils.make_grid(torch.cat([G_imgs, x.cpu().detach()]), nrow=10, padding=0, normalize=True,                                              scale_each=True), (1, 2, 0)))            plt.savefig('E_%d_.png' % step)            plt.show()    torch.save(G, 'G.pkl')    torch.save(D, 'D.pkl')    torch.save(E, 'E.pkl')

model.py

import osimport torchimport torch.nn as nnimport torch.utils.data as Dataimport torchvisionfrom torch.utils.data import DataLoaderclass Generator(nn.Module):    def __init__(self, input_size):        super(Generator, self).__init__()        strides = [1, 2, 2, 2]        padding = [0, 1, 1, 1]        channels = [input_size,                    256, 128, 64, 32]  # 1表示一维        kernels = [4, 3, 4, 4]        model = []        for i, stride in enumerate(strides):            model.append(                nn.ConvTranspose2d(                    in_channels=channels[i],                    out_channels=channels[i + 1],                    stride=stride,                    kernel_size=kernels[i],                    padding=padding[i]                )            )            model.append(                nn.BatchNorm2d(channels[i + 1])            )            model.append(                nn.LeakyReLU(.1)            )        self.Conv_T = nn.Sequential(*model)        self.Conv = nn.Sequential(            nn.Conv2d(kernel_size=1, stride=1, in_channels=channels[-1], out_channels=channels[-1]),            nn.BatchNorm2d(channels[-1]),            nn.LeakyReLU(.1),            nn.Conv2d(kernel_size=1, stride=1, in_channels=channels[-1], out_channels=1),            nn.Sigmoid()        )    def forward(self, x):        x = self.Conv_T(x)        x = self.Conv(x)        return xclass Encoder(nn.Module):    def __init__(self, input_size=1, out_size=128):        super(Encoder, self).__init__()        strides = [2, 2, 2, 1, 1, 1]        padding = [1, 1, 1, 0, 0, 0]        channels = [input_size, 32, 64, 128, 256, out_size, out_size]  # 1表示一维        kernels = [4, 4, 4, 3, 1, 1]        model = []        for i, stride in enumerate(strides):            model.append(                nn.Conv2d(                    in_channels=channels[i],                    out_channels=channels[i + 1],                    stride=stride,                    kernel_size=kernels[i],                    padding=padding[i]                )            )            if i != len(strides) - 1:                model.append(                    nn.BatchNorm2d(channels[i + 1])                )                model.append(                    nn.ReLU()                )        self.main = nn.Sequential(*model)    def forward(self, x):        x = self.main(x)        return xclass Discriminator(nn.Module):    def __init__(self, x_in=1, z_in=128):        super(Discriminator, self).__init__()        self.D_x = nn.Sequential(            nn.Conv2d(in_channels=x_in, out_channels=32, kernel_size=4, stride=2),            nn.Dropout2d(.2),            nn.LeakyReLU(.1),            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4, stride=2),            nn.BatchNorm2d(64),            nn.Dropout2d(.2),            nn.LeakyReLU(.1),            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=4, stride=2),            nn.BatchNorm2d(128),            nn.Dropout2d(.2),            nn.LeakyReLU(.1),        )        self.D_z = nn.Sequential(            nn.Conv2d(in_channels=z_in, out_channels=256, kernel_size=1, stride=1),            nn.Dropout2d(.2),            nn.LeakyReLU(.1),            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1, stride=1),            nn.Dropout2d(.2),            nn.LeakyReLU(.1),        )        self.D_x_z = nn.Sequential(            nn.Conv2d(in_channels=256 + 128, out_channels=512, kernel_size=1, stride=1),            nn.Dropout2d(.2),            nn.LeakyReLU(.1),            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1),            nn.Dropout2d(.2),            nn.LeakyReLU(.1),            nn.Conv2d(in_channels=512, out_channels=1, kernel_size=1, stride=1),            nn.Dropout2d(.2),            nn.Sigmoid(),        )    def forward(self, x, z):        x = self.D_x(x)        z = self.D_z(z)        cat_x_z = torch.cat([x, z], dim=1)        return self.D_x_z(cat_x_z)if __name__ == '__main__':    N_IDEAS = 128    G = Generator(N_IDEAS, )    rand_noise = torch.randn((10, N_IDEAS, 1, 1))    print(G(rand_noise).shape)    E = Encoder(input_size=1, out_size=N_IDEAS)    print(E(G(rand_noise)).shape)    D = Discriminator()    print(D(G(rand_noise), rand_noise).shape)

judge.py

import numpy as npimport torchimport matplotlib.pyplot as pltfrom model import Generator, Discriminatorimport torchvision.utils as vutilsimport osimport torchvisionfrom torch.utils.data import Dataset, DataLoaderif __name__ == '__main__':    BATCH_SIZE = 100    N_IDEAS = 12    TIME = 10    G = torch.load("G.pkl").cuda()    mnist_root = '../Conditional-GAN/mnist/'    DOWNLOAD_MNIST = False    if not (os.path.exists(mnist_root)) or not os.listdir(mnist_root):        # not mnist dir or mnist is empyt dir        DOWNLOAD_MNIST = True    train_data = torchvision.datasets.MNIST(        root=mnist_root,        train=True,  # this is training data        transform=torchvision.transforms.ToTensor(),  # Converts a PIL.Image or numpy.ndarray to        # torch.FloatTensor of shape (C x H x W) and normalize in the range [0.0, 1.0]        download=DOWNLOAD_MNIST,    )    train_loader = DataLoader(dataset=train_data, batch_size=10, shuffle=True)    E = torch.load('E.pkl')    for t in range(TIME):        tmp = []        for step, (x, y) in enumerate(train_loader):            # x            x = x.cuda()            G_imgs = G(E(x)).cpu().detach()            tmp.append(torch.cat([G_imgs, x.cpu().detach()]))            if step == 5:                break        fig = plt.figure(figsize=(10, 10))        plt.axis("off")        plt.imshow(            np.transpose(vutils.make_grid(torch.cat(tmp), nrow=10, padding=0, normalize=True,                                          scale_each=True), (1, 2, 0)))        plt.savefig('E_%d.png' % t)        plt.show()

882427abd25326e081a98344d54a7cbd.png

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值