FID与LPIPS等图像质量与多样性指标

Andy Dennis

已于 2023-09-03 21:43:36 修改

阅读量2.6k

点赞数 1

分类专栏：人工智能文章标签：深度学习计算机视觉图像生成 GAN diffusion

于 2022-12-23 17:16:07 首次发布

本文链接：https://blog.csdn.net/weixin_43850253/article/details/128421808

版权

人工智能专栏收录该内容

47 篇文章

订阅专栏

前言

近年来，由于预训练模型的兴起，图像生成领域的客观拼接指标开始流行起来。其中评价指向生成质量的指标一般是FID或者其变体，评价多样性的指标一般是LPIPS等(多个生成结果之间的LPIPS)。

本文会不断更新… 先挖坑系列…

trade-off:
(1) high-fidelity, high-quality images
(2) diverse images

这些指标我们还可以写一个adder类(参考MIMO-Unet)

class Adder(object):
    def __init__(self):
        self.count = 0
        self.num = float(0)

    def reset(self):
        self.count = 0
        self.num = float(0)

    def __call__(self, num):
        self.count += 1
        self.num += num

    def average(self):
        return self.num / self.count

在测试的时候算出每个样本的psnr和ssim之后, 加起来

psnr = psnr_calculator(pred_clip.cpu(), label_img.cpu())
ssim = structural_similarity_index_measure(pred_clip.cpu(), label_img.cpu())
psnr_adder(psnr)
ssim_adder(ssim)

最后再输出以下句子则可以计算出整个测试集的指标了

print('The average PSNR(torchmetrics) is %.2f dB' % (psnr_adder.average()))
print('The average SSIM is %.2f' % (ssim_adder.average()))

2023/09/03 我觉得这个类也不错

class AverageMeter():
    """ Computes and stores the average and current value """

    def __init__(self):
        self.reset()

    def reset(self):
        """ Reset all statistics """
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        """ Update statistics """
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

FID

生成图片与真实图片经过Inception-v3后2048维向量的距离。

def cal_fid(f1, f2):
	# calculate mean and covariance statistics 
	mu1, sigma1 = f1.mean(axis=0), cov(f1, rowbar=False)
	mu2, sigma2 = f2.mean(axis=0), cov(f2, rowbar=False)
	# calculate sum squared difference between means
	ssdiff = numpy.sum((mu1 - mu2) ** 2.0)
	# calculate sqrt of product between cov
	covmean = sqrtm(sigma1.doct(sigma2))
	# check and correct imaginary numbers from sqrt
	if iscomplexobj(covmean):
		covmean = covmean.real
	# calculate socre
	fid = ssdiff + trace(sigma1 + sigma2 - 2.0 * covmean)
	return fid

关于计算FID，这里有份推荐的代码: fid_score_torch.py (lyndonzheng/Pluralistic-Inpainting)

或者这个 https://github.com/mseitzer/pytorch-fid

PSNR和SSIM

以下代码需要gt_dir 和predict_dir 中的图片一一对应 (图片名字一样，例如 gt_dir/001.jpg和 predict_dir/001.jpg)

"""
计算一些性能指标
"""
"""
计算一些性能指标
"""
import os
import math
from cv2 import mean
import numpy as np
import cv2
# from skimage.measure import compare_ssim, compare_psnr
from skimage.metrics import structural_similarity as compare_ssim
from skimage.metrics import peak_signal_noise_ratio as compare_psnr


def cal_dir_psnr_ssim(gt_dir, predit_dir):
    predict_lt = os.listdir(predit_dir)
    gt_lt = os.listdir(gt_dir)
    assert predict_lt == gt_lt

    psnr_lt = []
    ssim_lt = []
    for img_name in predict_lt:
        # output_img_path = '{}/{}/errnet.png'.format(input_dir, dir_name)
        predit_img_path = '{}/{}'.format(predit_dir, img_name)
        gt_img_path = '{}/{}'.format(gt_dir, img_name)

        output_img = cv2.imread(predit_img_path)
        target_img= cv2.imread(gt_img_path)

        psnr = compare_psnr(target_img, output_img)
        psnr_lt.append(psnr)

        ssim = compare_ssim(target_img, output_img, multichannel=True)
        ssim_lt.append(ssim)

    print('psnr_lt:\n', psnr_lt)
    print('ssim_lt:\n', ssim_lt)
    print('\n *******************************')
    print('mean psnr: ', np.mean(psnr_lt))
    print('mean ssim: ', np.mean(ssim_lt))



if __name__ == '__main__':
    gt_dir = '【你的ground truth路径，路径下是gt图片】'
    predit_dir = '【模型预测的图片输出路径】'
    cal_dir_psnr_ssim(gt_dir, predit_dir)

或者参考这个仓库的代码 https://github.com/Ree1s/IDM/blob/main/core/metrics.py

def calculate_psnr(img1, img2):
    # img1 and img2 have range [0, 255]
    img1 = img1.astype(np.float64)
    img2 = img2.astype(np.float64)
    mse = np.mean((img1 - img2)**2)
    if mse == 0:
        return float('inf')
    return 20 * math.log10(255.0 / math.sqrt(mse))


def ssim(img1, img2):
    C1 = (0.01 * 255)**2
    C2 = (0.03 * 255)**2

    img1 = img1.astype(np.float64)
    img2 = img2.astype(np.float64)
    kernel = cv2.getGaussianKernel(11, 1.5)
    window = np.outer(kernel, kernel.transpose())

    mu1 = cv2.filter2D(img1, -1, window)[5:-5, 5:-5]  # valid
    mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5]
    mu1_sq = mu1**2
    mu2_sq = mu2**2
    mu1_mu2 = mu1 * mu2
    sigma1_sq = cv2.filter2D(img1**2, -1, window)[5:-5, 5:-5] - mu1_sq
    sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq
    sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2

    ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) *
                                                            (sigma1_sq + sigma2_sq + C2))
    return ssim_map.mean()


def calculate_ssim(img1, img2):
    '''calculate SSIM
    the same outputs as MATLAB's
    img1, img2: [0, 255]
    '''
    if not img1.shape == img2.shape:
        raise ValueError('Input images must have the same dimensions.')
    if img1.ndim == 2:
        return ssim(img1, img2)
    elif img1.ndim == 3:
        if img1.shape[2] == 3:
            ssims = []
            for i in range(3):
                ssims.append(ssim(img1, img2))
            return np.array(ssims).mean()
        elif img1.shape[2] == 1:
            return ssim(np.squeeze(img1), np.squeeze(img2))
    else:
        raise ValueError('Wrong input image dimensions.')

从DRSformer的 utils.py 中找到一个看起来还不错的

import numpy as np
import os
import cv2
import math

def calculate_psnr(img1, img2, border=0):
    # img1 and img2 have range [0, 255]
    #img1 = img1.squeeze()
    #img2 = img2.squeeze()
    if not img1.shape == img2.shape:
        raise ValueError('Input images must have the same dimensions.')
    h, w = img1.shape[:2]
    img1 = img1[border:h-border, border:w-border]
    img2 = img2[border:h-border, border:w-border]

    img1 = img1.astype(np.float64)
    img2 = img2.astype(np.float64)
    mse = np.mean((img1 - img2)**2)
    if mse == 0:
        return float('inf')
    return 20 * math.log10(255.0 / math.sqrt(mse))


# --------------------------------------------
# SSIM
# --------------------------------------------
def calculate_ssim(img1, img2, border=0):
    '''calculate SSIM
    the same outputs as MATLAB's
    img1, img2: [0, 255]
    '''
    #img1 = img1.squeeze()
    #img2 = img2.squeeze()
    if not img1.shape == img2.shape:
        raise ValueError('Input images must have the same dimensions.')
    h, w = img1.shape[:2]
    img1 = img1[border:h-border, border:w-border]
    img2 = img2[border:h-border, border:w-border]

    if img1.ndim == 2:
        return ssim(img1, img2)
    elif img1.ndim == 3:
        if img1.shape[2] == 3:
            ssims = []
            for i in range(3):
                ssims.append(ssim(img1[:,:,i], img2[:,:,i]))
            return np.array(ssims).mean()
        elif img1.shape[2] == 1:
            return ssim(np.squeeze(img1), np.squeeze(img2))
    else:
        raise ValueError('Wrong input image dimensions.')


def ssim(img1, img2):
    C1 = (0.01 * 255)**2
    C2 = (0.03 * 255)**2

    img1 = img1.astype(np.float64)
    img2 = img2.astype(np.float64)
    kernel = cv2.getGaussianKernel(11, 1.5)
    window = np.outer(kernel, kernel.transpose())

    mu1 = cv2.filter2D(img1, -1, window)[5:-5, 5:-5]  # valid
    mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5]
    mu1_sq = mu1**2
    mu2_sq = mu2**2
    mu1_mu2 = mu1 * mu2
    sigma1_sq = cv2.filter2D(img1**2, -1, window)[5:-5, 5:-5] - mu1_sq
    sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq
    sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2

    ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) *
                                                            (sigma1_sq + sigma2_sq + C2))
    return ssim_map.mean()

def load_img(filepath):
    return cv2.cvtColor(cv2.imread(filepath), cv2.COLOR_BGR2RGB)

def save_img(filepath, img):
    cv2.imwrite(filepath,cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

def load_gray_img(filepath):
    return np.expand_dims(cv2.imread(filepath, cv2.IMREAD_GRAYSCALE), axis=2)

def save_gray_img(filepath, img):
    cv2.imwrite(filepath, img)