前言
近年来,由于预训练模型的兴起,图像生成领域的客观拼接指标开始流行起来。其中评价指向生成质量的指标一般是FID或者其变体,评价多样性的指标一般是LPIPS等(多个生成结果之间的LPIPS)。
本文会不断更新… 先挖坑系列…
trade-off:
(1) high-fidelity, high-quality images
(2) diverse images
这些指标我们还可以写一个adder类(参考MIMO-Unet)
class Adder(object):
def __init__(self):
self.count = 0
self.num = float(0)
def reset(self):
self.count = 0
self.num = float(0)
def __call__(self, num):
self.count += 1
self.num += num
def average(self):
return self.num / self.count
在测试的时候算出每个样本的psnr和ssim之后, 加起来
psnr = psnr_calculator(pred_clip.cpu(), label_img.cpu())
ssim = structural_similarity_index_measure(pred_clip.cpu(), label_img.cpu())
psnr_adder(psnr)
ssim_adder(ssim)
最后再输出以下句子则可以计算出整个测试集的指标了
print('The average PSNR(torchmetrics) is %.2f dB' % (psnr_adder.average()))
print('The average SSIM is %.2f' % (ssim_adder.average()))
2023/09/03 我觉得这个类也不错
class AverageMeter():
""" Computes and stores the average and current value """
def __init__(self):
self.reset()
def reset(self):
""" Reset all statistics """
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
""" Update statistics """
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
FID
生成图片与真实图片经过Inception-v3后2048维向量的距离。
def cal_fid(f1, f2):
# calculate mean and covariance statistics
mu1, sigma1 = f1.mean(axis=0), cov(f1, rowbar=False)
mu2, sigma2 = f2.mean(axis=0), cov(f2, rowbar=False)
# calculate sum squared difference between means
ssdiff = numpy.sum((mu1 - mu2) ** 2.0)
# calculate sqrt of product between cov
covmean = sqrtm(sigma1.doct(sigma2))
# check and correct imaginary numbers from sqrt
if iscomplexobj(covmean):
covmean = covmean.real
# calculate socre
fid = ssdiff + trace(sigma1 + sigma2 - 2.0 * covmean)
return fid
关于计算FID,这里有份推荐的代码: fid_score_torch.py (lyndonzheng/Pluralistic-Inpainting)
或者这个 https://github.com/mseitzer/pytorch-fid
PSNR和SSIM
以下代码需要gt_dir 和predict_dir 中的图片一一对应 (图片名字一样,例如 gt_dir/001.jpg和 predict_dir/001.jpg)
"""
计算一些性能指标
"""
"""
计算一些性能指标
"""
import os
import math
from cv2 import mean
import numpy as np
import cv2
# from skimage.measure import compare_ssim, compare_psnr
from skimage.metrics import structural_similarity as compare_ssim
from skimage.metrics import peak_signal_noise_ratio as compare_psnr
def cal_dir_psnr_ssim(gt_dir, predit_dir):
predict_lt = os.listdir(predit_dir)
gt_lt = os.listdir(gt_dir)
assert predict_lt == gt_lt
psnr_lt = []
ssim_lt = []
for img_name in predict_lt:
# output_img_path = '{}/{}/errnet.png'.format(input_dir, dir_name)
predit_img_path = '{}/{}'.format(predit_dir, img_name)
gt_img_path = '{}/{}'.format(gt_dir, img_name)
output_img = cv2.imread(predit_img_path)
target_img= cv2.imread(gt_img_path)
psnr = compare_psnr(target_img, output_img)
psnr_lt.append(psnr)
ssim = compare_ssim(target_img, output_img, multichannel=True)
ssim_lt.append(ssim)
print('psnr_lt:\n', psnr_lt)
print('ssim_lt:\n', ssim_lt)
print('\n *******************************')
print('mean psnr: ', np.mean(psnr_lt))
print('mean ssim: ', np.mean(ssim_lt))
if __name__ == '__main__':
gt_dir = '【你的ground truth路径,路径下是gt图片】'
predit_dir = '【模型预测的图片输出路径】'
cal_dir_psnr_ssim(gt_dir, predit_dir)
或者参考这个仓库的代码 https://github.com/Ree1s/IDM/blob/main/core/metrics.py
def calculate_psnr(img1, img2):
# img1 and img2 have range [0, 255]
img1 = img1.astype(np.float64)
img2 = img2.astype(np.float64)
mse = np.mean((img1 - img2)**2)
if mse == 0:
return float('inf')
return 20 * math.log10(255.0 / math.sqrt(mse))
def ssim(img1, img2):
C1 = (0.01 * 255)**2
C2 = (0.03 * 255)**2
img1 = img1.astype(np.float64)
img2 = img2.astype(np.float64)
kernel = cv2.getGaussianKernel(11, 1.5)
window = np.outer(kernel, kernel.transpose())
mu1 = cv2.filter2D(img1, -1, window)[5:-5, 5:-5] # valid
mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5]
mu1_sq = mu1**2
mu2_sq = mu2**2
mu1_mu2 = mu1 * mu2
sigma1_sq = cv2.filter2D(img1**2, -1, window)[5:-5, 5:-5] - mu1_sq
sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq
sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2
ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) *
(sigma1_sq + sigma2_sq + C2))
return ssim_map.mean()
def calculate_ssim(img1, img2):
'''calculate SSIM
the same outputs as MATLAB's
img1, img2: [0, 255]
'''
if not img1.shape == img2.shape:
raise ValueError('Input images must have the same dimensions.')
if img1.ndim == 2:
return ssim(img1, img2)
elif img1.ndim == 3:
if img1.shape[2] == 3:
ssims = []
for i in range(3):
ssims.append(ssim(img1, img2))
return np.array(ssims).mean()
elif img1.shape[2] == 1:
return ssim(np.squeeze(img1), np.squeeze(img2))
else:
raise ValueError('Wrong input image dimensions.')
从DRSformer的 utils.py 中找到一个看起来还不错的
import numpy as np
import os
import cv2
import math
def calculate_psnr(img1, img2, border=0):
# img1 and img2 have range [0, 255]
#img1 = img1.squeeze()
#img2 = img2.squeeze()
if not img1.shape == img2.shape:
raise ValueError('Input images must have the same dimensions.')
h, w = img1.shape[:2]
img1 = img1[border:h-border, border:w-border]
img2 = img2[border:h-border, border:w-border]
img1 = img1.astype(np.float64)
img2 = img2.astype(np.float64)
mse = np.mean((img1 - img2)**2)
if mse == 0:
return float('inf')
return 20 * math.log10(255.0 / math.sqrt(mse))
# --------------------------------------------
# SSIM
# --------------------------------------------
def calculate_ssim(img1, img2, border=0):
'''calculate SSIM
the same outputs as MATLAB's
img1, img2: [0, 255]
'''
#img1 = img1.squeeze()
#img2 = img2.squeeze()
if not img1.shape == img2.shape:
raise ValueError('Input images must have the same dimensions.')
h, w = img1.shape[:2]
img1 = img1[border:h-border, border:w-border]
img2 = img2[border:h-border, border:w-border]
if img1.ndim == 2:
return ssim(img1, img2)
elif img1.ndim == 3:
if img1.shape[2] == 3:
ssims = []
for i in range(3):
ssims.append(ssim(img1[:,:,i], img2[:,:,i]))
return np.array(ssims).mean()
elif img1.shape[2] == 1:
return ssim(np.squeeze(img1), np.squeeze(img2))
else:
raise ValueError('Wrong input image dimensions.')
def ssim(img1, img2):
C1 = (0.01 * 255)**2
C2 = (0.03 * 255)**2
img1 = img1.astype(np.float64)
img2 = img2.astype(np.float64)
kernel = cv2.getGaussianKernel(11, 1.5)
window = np.outer(kernel, kernel.transpose())
mu1 = cv2.filter2D(img1, -1, window)[5:-5, 5:-5] # valid
mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5]
mu1_sq = mu1**2
mu2_sq = mu2**2
mu1_mu2 = mu1 * mu2
sigma1_sq = cv2.filter2D(img1**2, -1, window)[5:-5, 5:-5] - mu1_sq
sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq
sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2
ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) *
(sigma1_sq + sigma2_sq + C2))
return ssim_map.mean()
def load_img(filepath):
return cv2.cvtColor(cv2.imread(filepath), cv2.COLOR_BGR2RGB)
def save_img(filepath, img):
cv2.imwrite(filepath,cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
def load_gray_img(filepath):
return np.expand_dims(cv2.imread(filepath, cv2.IMREAD_GRAYSCALE), axis=2)
def save_gray_img(filepath, img):
cv2.imwrite(filepath, img)