目录
FID介绍
FID(Fréchet Inception Distance)是一种在机器学习领域,尤其是生成模型评估中常用的度量指标。它由Martin Heusel等人在2017年提出,用于衡量生成模型生成的图像与真实图像分布之间的差异。FID通过计算两个分布的均值和协方差矩阵之间的Fréchet距离来实现这一目的,其值越小,表示生成模型生成的图像与真实图像的分布越接近,即生成图像的质量越高。
FID的计算过程包括以下几个步骤:
1. 从真实数据集和生成模型中分别抽取样本。
2. 使用预训练的Inception网络(通常是Inception-v3)提取这些样本的特征向量。
3. 计算两个分布的均值向量和协方差矩阵。
4. 利用这些统计量计算Fréchet距离,即FID值。
FID的计算公式为:
其中,μ1和 μ2分别代表真实数据和生成模型的均值向量,Σ1和Σ2分别代表两者的协方差矩阵,Tr表示矩阵的迹,即对角线元素的和。
FID作为一种评估指标,被广泛用于生成模型的训练和评估中,它可以帮助研究者更准确地评估生成模型的质量,并选择更好的模型。同时,FID也是一种客观的评估指标,可以减少人为主观因素对评估结果的影响。
代码实现
eval/FID · even_day/eval - 码云 - 开源中国 (gitee.com)
自建数据集npz文件构建
import pickle
import numpy as np
import torch
import torchvision.transforms as transforms
from PIL import Image
from torch.nn.functional import adaptive_avg_pool2d
from torch.utils import data
from inception import InceptionV3
import os
# os.environ['KMP_DUPLICATE_LIB_OK']='True'
os.environ['CUDA_VISIBLE_DEVICES'] = '7'
class Dataset(data.Dataset):
"""Characterizes a dataset for PyTorch"""
def __init__(self, path, transform=None):
"""Initialization"""
self.file_names = self.get_filenames(path)
self.transform = transform
def __len__(self):
"""Denotes the total number of samples"""
return len(self.file_names)
def __getitem__(self, index):
"""Generates one sample of data"""
img = Image.open(self.file_names[index]).convert('RGB')
# Convert image and label to torch tensors
if self.transform is not None:
img = self.transform(img)
return img
def get_filenames(self, path):
data_path = f'{path}/test/filenames.pickle'
with open(data_path, 'rb') as f:
content = pickle.load(f)
content1 = [i + '.jpg' for i in content]
if 'birds' in path:
images = [path + '/CUB_200_2011/images/' + j for j in content1]
else:
images = [path + '/images/' + j for j in content1]
return images
def get_activations(images, model, batch_size=64, dims=2048, cuda=False, verbose=True):
"""Calculates the activations of the pool_3 layer for all images.
Params:
-- images : Numpy array of dimension (n_images, 3, hi, wi). The values
must lie between 0 and 1.
-- model : Instance of inception model
-- batch_size : the images numpy array is split into batches with
batch size batch_size. A reasonable batch size depends
on the hardware.
-- dims : Dimensionality of features returned by Inception
-- cuda : If set to True, use GPU
-- verbose : If set to True and parameter out_step is given, the number
of calculated batches is reported.
Returns:
-- A numpy array of dimension (num images, dims) that contains the
activations of the given tensor when feeding inception with the
query tensor.
"""
model.eval()
# d0 = images.shape[0]
d0 = images.__len__() * batch_size
print('Dataset length: ', d0)
if batch_size > d0:
print(('Warning: batch size is bigger than the data size. '
'Setting batch size to data size'))
batch_size = d0
n_batches = d0 // batch_size
n_used_imgs = n_batches * batch_size
pred_arr = np.empty((n_used_imgs, dims))
# for i in range(n_batches):
for i, batch in enumerate(images):
# batch = batch[0]
# if verbose:
# print('\rPropagating batch %d/%d' % (i + 1, n_batches), end='', flush=True)
# import ipdb
# ipdb.set_trace()
start = i * batch_size
end = start + batch_size
# batch = torch.from_numpy(images[start:end]).type(torch.FloatTensor)
# batch = Variable(batch, volatile=True)
if cuda:
batch = batch.cuda()
pred = model(batch)[0]
# If model output is not scalar, apply global spatial average pooling.
# This happens if you choose a dimensionality not equal 2048.
if pred.shape[2] != 1 or pred.shape[3] != 1:
pred = adaptive_avg_pool2d(pred, output_size=(1, 1))
pred_arr[start:end] = pred.cpu().data.numpy().reshape(batch_size, -1)
if verbose:
print(' done')
return pred_arr
def calculate_activation_statistics(images, model, batch_size=64,
dims=2048, cuda=False, verbose=True):
"""Calculation of the statistics used by the FID.
Params:
-- images : Numpy array of dimension (n_images, 3, hi, wi). The values
must lie between 0 and 1.
-- model : Instance of inception model
-- batch_size : The images numpy array is split into batches with
batch size batch_size. A reasonable batch size
depends on the hardware.
-- dims : Dimensionality of features returned by Inception
-- cuda : If set to True, use GPU
-- verbose : If set to True and parameter out_step is given, the
number of calculated batches is reported.
Returns:
-- mu : The mean over samples of the activations of the pool_3 layer of
the inception model.
-- sigma : The covariance matrix of the activations of the pool_3 layer of
the inception model.
"""
act = get_activations(images, model, batch_size, dims, cuda, verbose)
mu = np.mean(act, axis=0)
sigma = np.cov(act, rowvar=False)
return mu, sigma
if __name__ == '__main__':
path = '../../../data/birds'
batch_size = 32
cuda = torch.cuda.is_available()
dims = 2048
dataset = Dataset(path, transforms.Compose([
transforms.Resize(299),
transforms.RandomCrop(299),
transforms.Resize((299, 299)),
transforms.ToTensor(),
]))
print(dataset.__len__())
block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
model = InceptionV3([block_idx])
if cuda:
model.cuda()
dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False, drop_last=True, num_workers=8)
m, s = calculate_activation_statistics(dataloader, model, batch_size, dims, cuda)
np.savez('../../../data/birds/npz/bird_val256_FIDK0.npz', mu=m, sigma=s)
print('保存成功')