GAN-FID计算

最新推荐文章于 2024-05-07 11:39:10 发布

翰墨大人

最新推荐文章于 2024-05-07 11:39:10 发布

阅读量2k

点赞数 3

分类专栏： paper代码文章标签：生成对抗网络人工智能神经网络

本文链接：https://blog.csdn.net/qq_43733107/article/details/132077680

版权

paper代码专栏收录该内容

37 篇文章

订阅专栏

import os
import numpy as np
import torch
import time
from scipy import linalg # For numpy FID
from pathlib import Path
from PIL import Image
import models.models as models
from utils.fid_folder.inception import InceptionV3
import matplotlib.pyplot as plt

# --------------------------------------------------------------------------#
# This code is an adapted version of https://github.com/mseitzer/pytorch-fid
# --------------------------------------------------------------------------#

class fid_pytorch():
    def __init__(self, opt, dataloader_val):
        self.opt = opt
        self.dims = 2048
        block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[self.dims]
        self.model_inc = InceptionV3([block_idx])
        if opt.gpu_ids != "-1":
            self.model_inc.cuda()
        self.val_dataloader = dataloader_val
        self.m1, self.s1 = self.compute_statistics_of_val_path(dataloader_val)
        self.best_fid = 99999999
        self.path_to_save = os.path.join(self.opt.checkpoints_dir, self.opt.name, "FID")
        Path(self.path_to_save).mkdir(parents=True, exist_ok=True)

    def compute_statistics_of_val_path(self, dataloader_val):
        print("--- Now computing Inception activations for real set ---")
        pool = self.accumulate_inception_activations()
        mu, sigma = torch.mean(pool, 0), torch_cov(pool, rowvar=False)
        print("--- Finished FID stats for real set ---")
        return mu, sigma

    def accumulate_inception_activations(self):
        pool, logits, labels = [], [], []
        self.model_inc.eval()
        with torch.no_grad():
            for i, data_i in enumerate(self.val_dataloader):
                image = data_i["image"]
                if self.opt.gpu_ids != "-1":
                    image = image.cuda()
                image = (image + 1) / 2
                pool_val = self.model_inc(image.float())[0][:, :, 0, 0]
                pool += [pool_val]
        return torch.cat(pool, 0)

    def compute_fid_with_valid_path(self, netG, netEMA):
        pool, logits, labels = [], [], []
        self.model_inc.eval()
        netG.eval()
        if not self.opt.no_EMA:
            netEMA.eval()
        with torch.no_grad():
            for i, data_i in enumerate(self.val_dataloader):
                image, label = models.preprocess_input(self.opt, data_i)
                if self.opt.no_EMA:
                    generated = netG(label)
                else:
                    generated = netEMA(label)
                generated = (generated + 1) / 2
                pool_val = self.model_inc(generated.float())[0][:, :, 0, 0]
                pool += [pool_val]
            pool = torch.cat(pool, 0)
            mu, sigma = torch.mean(pool, 0), torch_cov(pool, rowvar=False)
            answer = self.numpy_calculate_frechet_distance(self.m1, self.s1, mu, sigma)
        netG.train()
        if not self.opt.no_EMA:
            netEMA.train()
        return answer

    def numpy_calculate_frechet_distance(self, mu1, sigma1, mu2, sigma2, eps=1e-6):
        """Numpy implementation of the Frechet Distance.
        Taken from https://github.com/bioinf-jku/TTUR
        The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
        and X_2 ~ N(mu_2, C_2) is
                d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
        Stable version by Dougal J. Sutherland.
        Params:
        -- mu1   : Numpy array containing the activations of a layer of the
                   inception net (like returned by the function 'get_predictions')
                   for generated samples.
        -- mu2   : The sample mean over activations, precalculated on an
                   representive data set.
        -- sigma1: The covariance matrix over activations for generated samples.
        -- sigma2: The covariance matrix over activations, precalculated on an
                   representive data set.
        Returns:
        --   : The Frechet Distance.
        """

        mu1, sigma1, mu2, sigma2 = mu1.detach().cpu().numpy(), sigma1.detach().cpu().numpy(), mu2.detach().cpu().numpy(), sigma2.detach().cpu().numpy()

        mu1 = np.atleast_1d(mu1)
        mu2 = np.atleast_1d(mu2)

        sigma1 = np.atleast_2d(sigma1)
        sigma2 = np.atleast_2d(sigma2)

        assert mu1.shape == mu2.shape, \
            'Training and test mean vectors have different lengths'
        assert sigma1.shape == sigma2.shape, \
            'Training and test covariances have different dimensions'

        diff = mu1 - mu2

        # Product might be almost singular
        covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
        if not np.isfinite(covmean).all():
            msg = ('fid calculation produces singular product; '
                   'adding %s to diagonal of cov estimates') % eps
            print(msg)
            offset = np.eye(sigma1.shape[0]) * eps
            covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))

        # Numerical error might give slight imaginary component
        if np.iscomplexobj(covmean):
            #print('wat')
            if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
                m = np.max(np.abs(covmean.imag))
                #print('Imaginary component {}'.format(m))
            covmean = covmean.real

        tr_covmean = np.trace(covmean)

        out = diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean
        return out

    def update(self, model, cur_iter):
        print("--- Iter %s: computing FID ---" % (cur_iter))
        cur_fid = self.compute_fid_with_valid_path(model.netG, model.netEMA)
        self.update_logs(cur_fid, cur_iter)
        print("--- FID at Iter %s: " % cur_iter, "{:.2f}".format(cur_fid))
        if cur_fid < self.best_fid:
            self.best_fid = cur_fid
            is_best = True
        else:
            is_best = False
        return is_best

    def update_logs(self, cur_fid, epoch):
        try :
            np_file = np.load(self.path_to_save + "/fid_log.npy")
            first = list(np_file[0, :])
            sercon = list(np_file[1, :])
            first.append(epoch)
            sercon.append(cur_fid)
            np_file = [first, sercon]
        except:
            np_file = [[epoch], [cur_fid]]

        np.save(self.path_to_save + "/fid_log.npy", np_file)

        np_file = np.array(np_file)
        plt.figure()
        plt.plot(np_file[0, :], np_file[1, :])
        plt.grid(b=True, which='major', color='#666666', linestyle='--')
        plt.minorticks_on()
        plt.grid(b=True, which='minor', color='#999999', linestyle='--', alpha=0.2)
        plt.savefig(self.path_to_save + "/plot_fid", dpi=600)
        plt.close()


def torch_cov(m, rowvar=False):
    '''Estimate a covariance matrix given data.
    Covariance indicates the level to which two variables vary together.
    If we examine N-dimensional samples, `X = [x_1, x_2, ... x_N]^T`,
    then the covariance matrix element `C_{ij}` is the covariance of
    `x_i` and `x_j`. The element `C_{ii}` is the variance of `x_i`.
    Args:
        m: A 1-D or 2-D array containing multiple variables and observations.
            Each row of `m` represents a variable, and each column a single
            observation of all those variables.
        rowvar: If `rowvar` is True, then each row represents a
            variable, with observations in the columns. Otherwise, the
            relationship is transposed: each column represents a variable,
            while the rows contain observations.
    Returns:
        The covariance matrix of the variables.
    '''
    if m.dim() > 2:
        raise ValueError('m has more than 2 dimensions')
    if m.dim() < 2:
        m = m.view(1, -1)
    if not rowvar and m.size(0) != 1:
        m = m.t()
    # m = m.type(torch.double)  # uncomment this line if desired
    fact = 1.0 / (m.size(1) - 1)
    m -= torch.mean(m, dim=1, keepdim=True)
    mt = m.t()  # if complex: mt = m.t().conj()
    return fact * m.matmul(mt).squeeze()

FID作用：
在这里插入图片描述
FID计算公式：
计算公式

分三步首先求图片的特征均值，接着求图片的协方差矩阵，最后求迹。
1：如何求特征均值：

使用inception-v3可以求特征。
2：如何求协方差矩阵？
协方差矩阵是由协方差和方差组成的。矩阵对角元素是方差，其余元素是协方差。
在这里插入图片描述

3：矩阵的迹就是对角元素相加。
代码：

最重要的就是计算FID，因为FID越小越好，所以当前的FID小于最好的best_FID,那么best_FID就等于当前的FID。
在compute_fid_with_valid_path内部：

将标签输入到生成器中产生RGB图，将生成的RGB图输入到inceptionv3中：
inception参数由block_index决定，根据字典知block_index=3。
在这里插入图片描述

在InceptionV3内部：
首先将输入下采样到(299,299)大小。

接着输入遍历block：

向modulist里面添加block0：

blcok由三个卷积组成，将输入图片下采样两倍，然后输出通道变为64,即(5,64,128,256),最后经过一个池化大小变为(5,64,64,128).卷积就是由一个卷积，一个BN，一个Relu组成。
在这里插入图片描述
接着添加block1:

3：添加block2：

4：添加block3，则block里面有四个sequential。

将x输入到inceptionv3，其中将idx等于3的输出添加到outp列表，即x完整经过inception的输出，然后跳出循环。最终输出为(5,2048).

将验证集所有图片经过inceptionv3的结果添加到列表中,一共执行100次循环。
在这里插入图片描述
将列表数据按照通道维度拼接起来：(500,2048)，对500张验证图片，每一张图片都有2048个概率输出。
接着mu等于沿着batch维度求所有图片的均值，sigma求协方差。则mu=[2048].
方差：

def torch_cov(m, rowvar=False):
    '''Estimate a covariance matrix given data.
    Covariance indicates the level to which two variables vary together.
    If we examine N-dimensional samples, `X = [x_1, x_2, ... x_N]^T`,
    then the covariance matrix element `C_{ij}` is the covariance of
    `x_i` and `x_j`. The element `C_{ii}` is the variance of `x_i`.
    Args:
        m: A 1-D or 2-D array containing multiple variables and observations.
            Each row of `m` represents a variable, and each column a single
            observation of all those variables.
        rowvar: If `rowvar` is True, then each row represents a
            variable, with observations in the columns. Otherwise, the
            relationship is transposed: each column represents a variable,
            while the rows contain observations.
    Returns:
        The covariance matrix of the variables.
    '''
    if m.dim() > 2:
        raise ValueError('m has more than 2 dimensions')
    if m.dim() < 2:
        m = m.view(1, -1)
    if not rowvar and m.size(0) != 1:
        m = m.t()
    # m = m.type(torch.double)  # uncomment this line if desired
    fact = 1.0 / (m.size(1) - 1)
    m -= torch.mean(m, dim=1, keepdim=True)
    mt = m.t()  # if complex: mt = m.t().conj()
    return fact * m.matmul(mt).squeeze()

首先将m进行转置。m变为(2048,500)。
接着求fact=1/(500-1)对应于方差公式中的分母。接着m = m- torch.mean(m，dim=1)对应于方差分子括号里的x减去x的均值。m大小为(2048,500)，torch.mean(m，dim=1)对应大小为(2048,1),两个相减，将(2048,1)广播到(2048,500)。
最后将m转置过来变为（500,2048）
最后fact乘以m*mt，其中m是(m-m.mean)，mt是m转置，相当于m乘以m转置。求和符号包含其中，对应公式：
在这里插入图片描述

计算完mu,sigma就该计算self.m1和self.s1了。
在这里插入图片描述

与之前不同的是这是inception输入的是image而不再是生成的图片了。但是操作和之前生成图片是一致的。
将之前的四个输出进行FID计算：

    def numpy_calculate_frechet_distance(self, mu1, sigma1, mu2, sigma2, eps=1e-6):
        """Numpy implementation of the Frechet Distance.
        Taken from https://github.com/bioinf-jku/TTUR
        The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
        and X_2 ~ N(mu_2, C_2) is
                d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
        Stable version by Dougal J. Sutherland.
        Params:
        -- mu1   : Numpy array containing the activations of a layer of the
                   inception net (like returned by the function 'get_predictions')
                   for generated samples.
        -- mu2   : The sample mean over activations, precalculated on an
                   representive data set.
        -- sigma1: The covariance matrix over activations for generated samples.
        -- sigma2: The covariance matrix over activations, precalculated on an
                   representive data set.
        Returns:
        --   : The Frechet Distance.
        """

        mu1, sigma1, mu2, sigma2 = mu1.detach().cpu().numpy(), sigma1.detach().cpu().numpy(), mu2.detach().cpu().numpy(), sigma2.detach().cpu().numpy()

        mu1 = np.atleast_1d(mu1)
        mu2 = np.atleast_1d(mu2)

        sigma1 = np.atleast_2d(sigma1)
        sigma2 = np.atleast_2d(sigma2)

        assert mu1.shape == mu2.shape, \
            'Training and test mean vectors have different lengths'
        assert sigma1.shape == sigma2.shape, \
            'Training and test covariances have different dimensions'

        diff = mu1 - mu2

        # Product might be almost singular
        covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
        if not np.isfinite(covmean).all():
            msg = ('fid calculation produces singular product; '
                   'adding %s to diagonal of cov estimates') % eps
            print(msg)
            offset = np.eye(sigma1.shape[0]) * eps
            covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))

        # Numerical error might give slight imaginary component
        if np.iscomplexobj(covmean):
            #print('wat')
            if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
                m = np.max(np.abs(covmean.imag))
                #print('Imaginary component {}'.format(m))
            covmean = covmean.real

        tr_covmean = np.trace(covmean)

        out = diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean
        return out