SRGAN图片超分辨率代码逐行讲解(深度学习课设1)

对应文章:Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial
Network

github代码链接:GitHub - leftthomas/SRGAN: A PyTorch implementation of SRGAN based on CVPR 2017 paper "Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network"

项目结构在最后。

train.py / model.py / test_image.py / test_benchmask.py / data_utils.py /pyssim.py / loss.py逐行解析:

train.py

import argparse#命令行选项、参数和子命令解析器
import os
from math import log10

import pandas as pd
import torch.optim as optim
import torch.utils.data
import torchvision.utils as utils
from torch.autograd import Variable
from torch.utils.data import DataLoader
from tqdm import tqdm #进度条

from Deep_learning.SRGAN.SRGAN import pyssim
from Deep_learning.SRGAN.SRGAN.data_utils import TrainDatasetFromFolder, ValDatasetFromFolder, display_transform
from Deep_learning.SRGAN.SRGAN.loss import GeneratorLoss
from Deep_learning.SRGAN.SRGAN.model import Generator, Discriminator
#argparse三步骤
# 【1】:创建解析器  作用:ArgumentParser 对象包含将命令行解析成 Python 数据类型所需的全部信息。
parser = argparse.ArgumentParser(description='Train Super Resolution Models')
# 【2】:添加参数  ''里的是参数名,default参数对应的默认值,type是值的类型,help是参数说明
parser.add_argument('--crop_size', default=88, type=int, help='training images crop size')# 设置参数:【训练图像裁剪大小】
# 用于指定超分辨率的放大因子,默认为4
parser.add_argument('--upscale_factor', default=4, type=int, choices=[2, 4, 8],
                    help='super resolution upscale factor')
# 用于指定训练的轮数,默认为100
parser.add_argument('--num_epochs', default=100, type=int, help='train epoch number')


if __name__ == '__main__':
    # 【3】:解析参数
    opt = parser.parse_args()# 解析命令行参数并将结果存储在变量opt中
    
    CROP_SIZE = opt.crop_size# 将设置好的【裁剪大小】参数赋值给CROP_SIZE,方便后续调用
    UPSCALE_FACTOR = opt.upscale_factor# 将设置好的【放大倍数】参数赋值给UPSCALE_FACTOR,方便后续调用
    NUM_EPOCHS = opt.num_epochs# 将设置好的【训练轮数】参数赋值给NUM_EPOCHS,方便后续调用
    # 创建训练数据集对象TrainDatasetFromFolder,指定数据集路径、裁剪尺寸和放大因子
    train_set = TrainDatasetFromFolder('E:/untitled/Deep_learning/SRGAN/SRGAN/data/VOC2012/train', crop_size=CROP_SIZE, upscale_factor=UPSCALE_FACTOR)
    # 创建验证数据集对象ValDatasetFromFolder,指定数据集路径和放大因子
    val_set = ValDatasetFromFolder('E:/untitled/Deep_learning/SRGAN/SRGAN/data/VOC2012/val', upscale_factor=UPSCALE_FACTOR)
    # 创建训练数据加载器,指定数据集对象、工作线程数、批量大小和是否打乱数据顺序
    train_loader = DataLoader(dataset=train_set, num_workers=4, batch_size=64, shuffle=True)
    # 创建验证数据加载器,指定数据集对象、工作线程数、批量大小和是否打乱数据顺序
    val_loader = DataLoader(dataset=val_set, num_workers=4, batch_size=1, shuffle=False)
    # 创建生成器模型对象Generator,指定放大因子
    netG = Generator(UPSCALE_FACTOR)
    print('# generator parameters:', sum(param.numel() for param in netG.parameters()))# 显示netG网络的所有参数和

    # 创建辨别器实例 netD
    netD = Discriminator()
    print('# discriminator parameters:', sum(param.numel() for param in netD.parameters()))
    # 创建生成器损失函数对象GeneratorLoss
    generator_criterion = GeneratorLoss()
    # GPU如果可用的话,将生成器模型、判别器模型和生成器损失函数移动到GPU上进行计算
    if torch.cuda.is_available():
        netG.cuda()
        netD.cuda()
        generator_criterion.cuda()

    # 创建生成器和判别器的优化器对象,用于更新模型参数
    optimizerG = optim.Adam(netG.parameters())
    optimizerD = optim.Adam(netD.parameters())

    # 创建一个字典用于存储训练过程中的判别器和生成器的损失、分数和评估指标结果(信噪比和相似性)
    results = {'d_loss': [], 'g_loss': [], 'd_score': [], 'g_score': [], 'psnr': [], 'ssim': []}

    # 开始训练,一次epoch跑一趟训练集
    for epoch in range(1, NUM_EPOCHS + 1):#想恢复训练,直接修改,将1改为想要恢复训练的开始轮数即可
        # 加载进度条
        train_bar = tqdm(train_loader)
        # 初始化参数,为后面训练方便计算参数
        running_results = {'batch_sizes': 0, 'd_loss': 0, 'g_loss': 0, 'd_score': 0, 'g_score': 0}

        # 假如要恢复训练,要加载之前的权重
        # netG.Load_state_dict(torch.load('netG_epoch_4_75.pth'))
        # netD.Load_state_dict(torch.load('netD_epoch_4_75.pth'))

        # 进入训练模式
        netG.train()
        netD.train()

        """
        train()函数:
        如果模型中有BN层(Batch Normalization)和 Dropout,需要在训练时添加model.train()。
        model.train()是保证BN层能够用到每一批数据的均值和方差。对于Dropout,model.train()是随机取一部分网络连接来训练更新参数。
        Dropout:这是一种算法,用于防止过拟合,通过阻止特征检测器的共同作用来提高神经网络的性能。
        在前向传播的时候,让某个神经元的激活值以一定的概率p停止工作,这样可以使模型泛化性更强,因为它不会太依赖某些局部的特征
        """
        # 训练细节v
        for data, target in train_bar:# train_bar:进度条
            g_update_first = True
            batch_size = data.size(0)
            running_results['batch_sizes'] += batch_size# 计算running_results中的batch_sizes参数
    
            ############################
            # (1) Update D network: maximize D(x)-1-D(G(z))
            # 最大化判别器判别原图(HR)概率,最小化生成图(SR)判别概率
            ###########################

            #HR
            real_img = Variable(target)# target是HR图片,real_img是真实高分辨率图像
            if torch.cuda.is_available():
                real_img = real_img.cuda()

            #LR
            z = Variable(data)
            if torch.cuda.is_available():
                z = z.cuda()

            #SR
            fake_img = netG(z)# 低分辨率图像(z)通过生成网络(netG)生成的虚假高分辨率图像(fake_img)
            """
            遍历epochs的过程中依次用到optimizer.zero_grad(),loss.backward()和optimizer.step()三个函数
            (1):先将梯度清零:optimizer.zero_grad():
            (2):然后反向传播计算得到每个参数的梯度值:loss.backward():
            (3):最后通过梯度下降执行一步参数更新:optimizer.step():
            """
            netD.zero_grad()
            # 反向传播过程,对辨别器输出的标量取平均值
            real_out = netD(real_img).mean()
            fake_out = netD(fake_img).mean()
            # 计算损失
            d_loss = 1 - real_out + fake_out
            # 反向传播计算梯度
            d_loss.backward(retain_graph=True)
            # 进行参数优化
            optimizerD.step()
    
            ############################
            # (2) Update G network: minimize 1-D(G(z)) + Perception Loss + Image Loss + TV Loss
            # 最小化生成网络中SR被认出概率、感知损失(VGG计算)、图像损失(MSE)、平滑损失
            ###########################
            netG.zero_grad()

            ## The two lines below are added to prevent runetime error in Google Colab ##
            # 通过生成器对输入图像(z)进行生成,生成伪图像(fake_img)
            fake_img = netG(z)
            # 通过判别器对伪图像进行前向传播,并计算其输出的平均值
            fake_out = netD(fake_img).mean()

            #计算生成器的损失,包括对抗损失、感知损失、图像损失和TV损失,及反向传播
            g_loss = generator_criterion(fake_out, fake_img, real_img)
            g_loss.backward()


            fake_img = netG(z)# 再次通过生成器对输入图像(z)进行生成,得到新的伪图像(fake_img)
            fake_out = netD(fake_img).mean()# 通过判别器对新的伪图像进行前向传播,并计算其输出的平均值

            optimizerG.step()# 利用优化器对生成器网络的参数进行更新

            # loss for current batch before optimization 
            running_results['g_loss'] += g_loss.item() * batch_size# g_loss = generator_criterion(fake_out, fake_img, real_img)
            running_results['d_loss'] += d_loss.item() * batch_size# d_loss = 1 - real_out + fake_out real_img/fake_img通过判别器的差距
            running_results['d_score'] += real_out.item() * batch_size# 累加当前批次真实图像在判别器的输出得分乘以批次大小,用于计算平均得分
            running_results['g_score'] += fake_out.item() * batch_size# 累加当前批次伪图像在判别器的输出得分乘以批次大小,用于计算平均得分

            # 更新训练进度条的描述信息
            train_bar.set_description(desc='[%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f' % (
                epoch, NUM_EPOCHS, running_results['d_loss'] / running_results['batch_sizes'],
                running_results['g_loss'] / running_results['batch_sizes'],
                running_results['d_score'] / running_results['batch_sizes'],
                running_results['g_score'] / running_results['batch_sizes']))
        # [1/100] Loss_D: 0.7932 Loss_G: 0.0296 D(x): 0.4958 D(G(z)): 0.2523: 100%|██████████| 50/50 [11:03<00:00, 13.27s/it]

        # 测试模式,无需更新网络
        netG.eval() #如果模型中有BN层(Batch Normalization)和Dropout,在测试时添加model.eval()

        # 模型保存
        out_path = 'training_results/SRF_' + str(UPSCALE_FACTOR) + '/'

        # 如果路径不存在则创建一个路径
        if not os.path.exists(out_path):
            os.makedirs(out_path)
        
        with torch.no_grad():
            # 加载进度条
            val_bar = tqdm(val_loader)# 用验证集进行计算MSE,输出图像
            valing_results = {'mse': 0, 'ssims': 0, 'psnr': 0, 'ssim': 0, 'batch_sizes': 0}
            val_images = [] # 声明一个空列表
            # 遍历验证数据集(低分辨率图 恢复的高分辨率图 高分辨率图)
            for val_lr, val_hr_restore, val_hr in val_bar:
                batch_size = val_lr.size(0)#返回矩阵的行数
                valing_results['batch_sizes'] += batch_size# 已经测试过的数目
                lr = val_lr
                hr = val_hr
                if torch.cuda.is_available():
                    lr = lr.cuda()
                    hr = hr.cuda()
                sr = netG(lr)# 低分辨率图像(lr)通过生成网络(netG)修复生成的高分辨率图像(sr)

                # 计算MSE(均方误差),计算PSNR时需要用到MSE
                batch_mse = ((sr - hr) ** 2).data.mean()
                valing_results['mse'] += batch_mse * batch_size # 累加均方误差

                # 计算SSIM(结构相似性)
                batch_ssim = pyssim.ssim(sr, hr).item()
                valing_results['ssims'] += batch_ssim * batch_size# 累加结构相似度指数

                # 计算PSNR(峰值信噪比)
                valing_results['psnr'] = 10 * log10((hr.max()**2) / (valing_results['mse'] / valing_results['batch_sizes']))

                # 计算平均结构相似度指数
                valing_results['ssim'] = valing_results['ssims'] / valing_results['batch_sizes']

                # 更新训练进度条的描述信息
                val_bar.set_description(
                    desc='[converting LR images to SR images] PSNR: %.4f dB SSIM: %.4f' % (
                    # [converting LR images to SR images] PSNR: 19.0167 dB SSIM: 0.4825: 100%|██████████| 100/100 [1:06:36<00:00, 39.97s/it]
                        valing_results['psnr'], valing_results['ssim']))

                val_images.extend(
                    # 将图像应用转换函数,并添加到验证图像列表
                    [display_transform()(val_hr_restore.squeeze(0)), display_transform()(hr.data.cpu().squeeze(0)),
                     display_transform()(sr.data.cpu().squeeze(0))])

            # 按行拼接,按列拼接
            val_images = torch.stack(val_images)# 将验证图像列表堆叠为张量
            val_images = torch.chunk(val_images, val_images.size(0) // 15) # 将堆叠后的张量分割为多个小块,每个小块包含15张图像
            # 创建进度条
            val_save_bar = tqdm(val_images, desc='[saving training results]')
            # [saving training results]: 100%|██████████| 20/20 [00:21<00:00,  1.09s/it]


            # index = 1
            # for image in val_save_bar:
            #     image = utils.make_grid(image, nrow=3, padding=5)
            #     utils.save_image(image, out_path + 'epoch_%d_index_%d.png' % (epoch, index), padding=5)
            #     index += 1

            if epoch % 20 == 0:
                index = 1
                for image in val_save_bar:
                    # 将小块中的图像创建为一个网格,每行显示3张图像,图像之间有5个像素的间隔
                    image = utils.make_grid(image, nrow=3, padding=5)
                    # 将网格图像保存为文件,文件名包含epoch和index信息
                    utils.save_image(image, out_path + 'epoch_%d_index_%d.png' % (epoch, index), padding=5)
                    index += 1

        # 将判别器和生成器的参数保存到指定文件
        torch.save(netG.state_dict(), 'epochs/netG_epoch_%d_%d.pth' % (UPSCALE_FACTOR, epoch))
        torch.save(netD.state_dict(), 'epochs/netD_epoch_%d_%d.pth' % (UPSCALE_FACTOR, epoch))

        # 保存各项指标
        # append()函数:向列表末尾添加元素
        results['d_loss'].append(running_results['d_loss'] / running_results['batch_sizes'])
        results['g_loss'].append(running_results['g_loss'] / running_results['batch_sizes'])
        results['d_score'].append(running_results['d_score'] / running_results['batch_sizes'])
        results['g_score'].append(running_results['g_score'] / running_results['batch_sizes'])
        results['psnr'].append(valing_results['psnr'])
        results['ssim'].append(valing_results['ssim'])
    
        if epoch % 10 == 0 and epoch != 0:
            # 各项指标保存路径
            out_path = 'statistics/'
            # 创建一个DataFrame对象,用于存储训练结果数据
            data_frame = pd.DataFrame(
                data={'Loss_D': results['d_loss'], 'Loss_G': results['g_loss'], 'Score_D': results['d_score'],
                      'Score_G': results['g_score'], 'PSNR': results['psnr'], 'SSIM': results['ssim']},
                index=range(1, epoch + 1))
            #每 10 次迭代后会将训练结果数据以 CSV 格式保存到statistics中
            data_frame.to_csv(out_path + 'srf_' + str(UPSCALE_FACTOR) + '_train_results.csv', index_label='Epoch')

model.py

import math
import torch
from torch import nn

class Generator(nn.Module):
    def __init__(self, scale_factor):
        #计算需要进行上采样的块的数量
        upsample_block_num = int(math.log(scale_factor, 2))
        super(Generator, self).__init__()
        #前面
        # 连接卷积层和激活函数层
        #【1】:深度残差模块  作用:进行高效的特征提取,可以在一定程度上削弱图像噪点。
        self.block1 = nn.Sequential(
            # 3个通道,64个卷积核,卷积核大小为9,需要扩充
            nn.Conv2d(3, 64, kernel_size=9, padding=4),
            nn.PReLU()#Parametric ReLU激活函数
        )
        #中间
        #残差块
        self.block2 = ResidualBlock(64)# k9n64s1:每个卷积层对应的核大小(k)、特征映射数(n)和步长(s)
        self.block3 = ResidualBlock(64)# 两个具有小3*3核和64个特征映射的卷积层,然后使用批归一化层和参数化作为激活函数
        self.block4 = ResidualBlock(64)
        self.block5 = ResidualBlock(64)
        self.block6 = ResidualBlock(64)
        self.block7 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64)
        )
        #最后
        #上采样层
        # 【2】:子像素卷积模型  作用:用来放大图像尺寸
        # 有两个子像素卷积模块,每个子像素卷积模块使得输入图像放大2倍,因此这个模型最终可以将图像放大4倍
        block8 = [UpsampleBLock(64, 2) for _ in range(upsample_block_num)]
        block8.append(nn.Conv2d(64, 3, kernel_size=9, padding=4))
        self.block8 = nn.Sequential(*block8)

    def forward(self, x):
        block1 = self.block1(x)
        block2 = self.block2(block1)
        block3 = self.block3(block2)
        block4 = self.block4(block3)
        block5 = self.block5(block4)
        block6 = self.block6(block5)
        block7 = self.block7(block6)
        block8 = self.block8(block1 + block7)
        # 将输出限制在0到1之间,通过tanh激活函数和缩放操作得到最终生成的图像
        return (torch.tanh(block8) + 1) / 2


class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.net = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.LeakyReLU(0.2),

            nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2),

            nn.Conv2d(128, 128, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2),

            nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2),

            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2),

            nn.Conv2d(512, 512, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2),
            # 自适应平均池化层,将输入特征图转换为大小为1x1的特征图
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(512, 1024, kernel_size=1),
            nn.LeakyReLU(0.2),
            nn.Conv2d(1024, 1, kernel_size=1)
        )

    def forward(self, x):
        # 输入批次的大小
        batch_size = x.size(0)
        # 使用torch.sigmoid函数将特征图映射到0到1之间,表示输入图像为真实图像的概率。
        return torch.sigmoid(self.net(x).view(batch_size))

#残差块
class ResidualBlock(nn.Module):
    def __init__(self, channels):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(channels, channels, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(channels)
        self.prelu = nn.PReLU()
        self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(channels)

    def forward(self, x):
        # 应用对应的layer得到前向传播的输出(残差项)
        residual = self.conv1(x)
        residual = self.bn1(residual)
        residual = self.prelu(residual)
        residual = self.conv2(residual)
        residual = self.bn2(residual)
        # 将输入x与残差项相加,得到最终输出
        return x + residual

# 上采样块
class UpsampleBLock(nn.Module):
    def __init__(self, in_channels, up_scale):
        super(UpsampleBLock, self).__init__()
        self.conv = nn.Conv2d(in_channels, in_channels * up_scale ** 2, kernel_size=3, padding=1)
        # 像素重排操作,上采样因子为up_scale
        self.pixel_shuffle = nn.PixelShuffle(up_scale)
        self.prelu = nn.PReLU()

    def forward(self, x):
        x = self.conv(x)
        x = self.pixel_shuffle(x)
        x = self.prelu(x)
        return x

test_image.py

把自己的图片放到data文件夹下,图片名替换掉“IMAGE_NAME”即可。

import argparse
import time

import torch
from PIL import Image
from torch.autograd import Variable
from torchvision.transforms import ToTensor, ToPILImage

from Deep_learning.SRGAN.SRGAN.model import Generator

parser = argparse.ArgumentParser(description='Test Single Image')
parser.add_argument('--upscale_factor', default=4, type=int, help='super resolution upscale factor')
parser.add_argument('--test_mode', default='GPU', type=str, choices=['GPU', 'CPU'], help='using GPU or CPU')
parser.add_argument('--image_name', type=str, help='test low resolution image name')
parser.add_argument('--model_name', default='netG_epoch_4_100.pth', type=str, help='generator model epoch name')
opt = parser.parse_args()

UPSCALE_FACTOR = opt.upscale_factor
TEST_MODE = True if opt.test_mode == 'GPU' else False
IMAGE_NAME = opt.image_name
MODEL_NAME = opt.model_name

model = Generator(UPSCALE_FACTOR).eval()
if TEST_MODE:
    model.cuda()
    model.load_state_dict(torch.load('epochs/' + MODEL_NAME))
else:
    model.load_state_dict(torch.load('epochs/' + MODEL_NAME, map_location=lambda storage, loc: storage))

image = Image.open('data/IMAGE_NAME')
image = ToTensor()(image)
with torch.no_grad():#确保在计算图中不会进行梯度计算
    image = image.unsqueeze(0)
if TEST_MODE:
    image = image.cuda()

start = time.clock()
out = model(image)
elapsed = (time.clock() - start)
print('cost' + str(elapsed) + 's')
out_img = ToPILImage()(out[0].data.cpu())
out_img.save('out_srf_' + str(UPSCALE_FACTOR) + '_' + IMAGE_NAME)

test_benchmask.py

import argparse
import os
from math import log10

import numpy as np
import pandas as pd
import torch
import torchvision.utils as utils
from torch.autograd import Variable
from torch.utils.data import DataLoader
from tqdm import tqdm
from Deep_learning.SRGAN.SRGAN.lpips import lpips

from Deep_learning.SRGAN.SRGAN import pyssim
from Deep_learning.SRGAN.SRGAN.data_utils import TestDatasetFromFolder, display_transform
from Deep_learning.SRGAN.SRGAN.model import Generator

def main():
    parser = argparse.ArgumentParser(description='Test Benchmark Datasets')
    parser.add_argument('--upscale_factor', default=4, type=int, help='super resolution upscale factor')
    parser.add_argument('--model_name', default='netG_epoch_4_100.pth', type=str, help='generator model epoch name')
    opt = parser.parse_args()

    UPSCALE_FACTOR = opt.upscale_factor
    MODEL_NAME = opt.model_name
    # 保存每个测试数据集的结果
    results = {'Set5': {'psnr': [], 'ssim': [], 'lpipsm': []},
               'Set14': {'psnr': [], 'ssim': [], 'lpipsm': []},
               'BSD100': {'psnr': [], 'ssim': [], 'lpipsm': []},
               'Urban100': {'psnr': [], 'ssim': [], 'lpipsm': []}
              }

    # 创建一个 Generator 对象
    model = Generator(UPSCALE_FACTOR).eval()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    if torch.cuda.is_available():
        model = model.cuda()
    # 加载训练好的模型参数
    model.load_state_dict(torch.load('epochs/' + MODEL_NAME))
    model = model.eval()
    # 加载测试数据集
    test_set = TestDatasetFromFolder('data/test', upscale_factor=UPSCALE_FACTOR)
    test_loader = DataLoader(dataset=test_set, num_workers=4, batch_size=1, shuffle=False)
    # 创建一个用于 test_loader 的 tqdm 进度条
    test_bar = tqdm(test_loader, desc='[testing benchmark datasets]')
    # 测试结果输出路径
    out_path = 'benchmark_results/SRF_' + str(UPSCALE_FACTOR) + '/'
    if not os.path.exists(out_path):
        os.makedirs(out_path)

    loss_fn = lpips.LPIPS(net='alex')
    loss_fn = loss_fn.to(device)

    with torch.no_grad():
        for image_name, lr_image, hr_restore_img, hr_image in test_bar:
            # 由于 image_name 是一个包含单个元素的列表,所以将其取出
            image_name = image_name[0]

            if torch.cuda.is_available():
                lr_image = lr_image.cuda()
                hr_image = hr_image.cuda()

            # 生成超分变率图像
            sr_image = model(lr_image).clamp(0.0, 1.0)

            mse = ((hr_image - sr_image) ** 2).mean()
            psnr = 10 * log10(1 / mse)
            ssim = pyssim.ssim(sr_image, hr_image).data.item()
            lpipsm = loss_fn.forward(sr_image, hr_image)

            # Move lpipsm to CPU before converting to numpy
            lpipsm = lpipsm.cpu().item()

            # 创建一个包含三张图像的张量,分别是原始恢复的高分辨率图像、原始高分辨率图像和生成的超分辨率图像
            # 将每张图像应用 display_transform() 转换,并通过 squeeze(0) 去除批次维度
            test_images = torch.stack([
                display_transform()(hr_restore_img.squeeze(0)),
                display_transform()(hr_image.data.cpu().squeeze(0)),
                display_transform()(sr_image.data.cpu().squeeze(0))
            ])

            # 使用 make_grid 函数将三张图像拼接成一张大图像
            image = utils.make_grid(test_images, nrow=3, padding=5)
            # 使用 save_image 函数将合成的图像保存到指定路径
            utils.save_image(image, out_path + image_name.split('.')[0] + f'_psnr_{psnr:.4f}_ssim_{ssim:.4f}_lpips_{lpipsm:.4f}.' +
                             image_name.split('.')[-1], padding=5)

            # 将对应数据集的PSNR、SSIM和lpipsm保存到对应的字典当中
            results[image_name.split('_')[0]]['psnr'].append(psnr)
            results[image_name.split('_')[0]]['ssim'].append(ssim)
            results[image_name.split('_')[0]]['lpipsm'].append(lpipsm)
    # 最终结果保存路径
    out_path = 'statistics/'
    saved_results = {'psnr': [], 'ssim': [], 'lpipsm': []}

    # 遍历 results 字典中的每个值
    for item in results.values():
        # 获取PSNR、SSIM和lpipsm的列表
        psnr = np.array(item['psnr'])
        ssim = np.array(item['ssim'])
        lpipsm = np.array(item['lpipsm']).astype(np.float64)
        if (len(psnr) == 0) or (len(ssim) == 0) or (len(lpipsm) == 0):
            # 如果列表为空,将PSNR、SSIM和lpipsm设置为 'No data'
            psnr = 'No data'
            ssim = 'No data'
            lpipsm = 'No data'
        else:
            # 如果列表不为空,计算PSNR、SSIM和lpipsm的均值
            psnr = psnr.mean()
            ssim = ssim.mean()
            lpipsm = lpipsm.mean()

        # 将计算得到的PSNR、SSIM和lpipsm添加到 saved_results 字典的相应列表中
        saved_results['psnr'].append(psnr)
        saved_results['ssim'].append(ssim)
        saved_results['lpipsm'].append(lpipsm)

    # 创建一个 DataFrame 对象,使用 saved_results 字典作为数据,以 results.keys() 作为列标签
    data_frame = pd.DataFrame(saved_results, results.keys())
    data_frame.to_csv(out_path + f'srf_{UPSCALE_FACTOR}_test_results.csv', index_label='DataSet')

if __name__ == '__main__':
    main()

data_utils.py

from os import listdir
from os.path import join

from PIL import Image
from torch.utils.data.dataset import Dataset
from torchvision.transforms import Compose, RandomCrop, ToTensor, ToPILImage, CenterCrop, Resize


def is_image_file(filename):
    # 判断文件名是否是图像文件
    return any(filename.endswith(extension) for extension in ['.png', '.jpg', '.jpeg', '.PNG', '.JPG', '.JPEG'])

# 计算有效的裁剪尺寸
def calculate_valid_crop_size(crop_size, upscale_factor):
    return crop_size - (crop_size % upscale_factor)#有效裁剪尺寸计算公式

# 训练集高分辨率图预处理函数
def train_hr_transform(crop_size):
    return Compose([
        RandomCrop(crop_size),#在随机位置上进行尺寸大小为 crop_size*crop_size 的裁剪
        ToTensor(),#将图像转换为张量
    ])

# 训练集低分辨率图预处理函数
def train_lr_transform(crop_size, upscale_factor):
    return Compose([
        ToPILImage(),# 变为图片
        # 整除下采样
        Resize(crop_size // upscale_factor, interpolation=Image.BICUBIC),# 通过双三次插值法对图像进行下采样
        ToTensor()
    ])


def display_transform():
    # 显示图像的转换
    return Compose([
        ToPILImage(),# 将张量转换为PIL图像
        Resize(400), # 将图像调整大小为400x400
        CenterCrop(400), # 对图像进行中心裁剪为400x400
        ToTensor()# 将图像转换为张量
    ])

#  加载训练集中的图像数据
class TrainDatasetFromFolder(Dataset):
    def __init__(self, dataset_dir, crop_size, upscale_factor):
        super(TrainDatasetFromFolder, self).__init__()
        # 获取目录中的所有图像文件名,并使用is_image_file函数来筛选出图像文件
        self.image_filenames = [join(dataset_dir, x) for x in listdir(dataset_dir) if is_image_file(x)]
        """
        listdir()函数:【将文件夹下的所有东西变成一个列表】
        listdir(dataset_dir) if is_image_file(x):返回指定目录【dataset_dir】下文件后缀名为['.png', '.jpg', '.jpeg', '.PNG', '.JPG', '.JPEG']的所有文件名,形成一个列表
        join()函数:【把目录和文件名进行拼接得到一个路径】
        join(dataset_dir, x):把路径dataset_dir与文件名x进行拼接,得到一个路径   
        """
        # 计算可用的裁剪尺寸
        crop_size = calculate_valid_crop_size(crop_size, upscale_factor)
        # 随机裁剪原图像
        self.hr_transform = train_hr_transform(crop_size)# 高分辨率图预处理函数
        # 将裁剪好的图像处理成低分辨率的图片
        self.lr_transform = train_lr_transform(crop_size, upscale_factor)# 低分辨率图预处理函数

    def __getitem__(self, index): #传入一个索引,返回索引相对应图片的高清与低清图片
        # 获取该index的高清图像,同时转化得到低清图像
        hr_image = self.hr_transform(Image.open(self.image_filenames[index]))# 随机裁剪获得高清图片
        lr_image = self.lr_transform(hr_image)# 获得低分辨图(低清图片是根据高清图片转换而来)
        return lr_image, hr_image

    # 返回列表的长度
    def __len__(self):
        return len(self.image_filenames)

#加载验证集中的图像数据
class ValDatasetFromFolder(Dataset):
    def __init__(self, dataset_dir, upscale_factor):
        super(ValDatasetFromFolder, self).__init__()
        self.upscale_factor = upscale_factor
        self.image_filenames = [join(dataset_dir, x) for x in listdir(dataset_dir) if is_image_file(x)]# 获取图片列表

    def __getitem__(self, index):
        hr_image = Image.open(self.image_filenames[index])# 打开高分辨率图像文件
        w, h = hr_image.size# 获取图像的宽度和高度
        crop_size = calculate_valid_crop_size(min(w, h), self.upscale_factor)# 计算可用的裁剪尺寸
        # 定义两种图片裁剪方式,一种针对高清图片,另一种针对低清图片,都是通过双三次插值法对图像进行下采样
        # 两者裁剪尺寸不一样,高清图片裁剪尺寸为crop_size // self.upscale_factor,低清图片裁剪尺寸为crop_size
        lr_scale = Resize(crop_size // self.upscale_factor, interpolation=Image.BICUBIC)
        hr_scale = Resize(crop_size, interpolation=Image.BICUBIC)
        hr_image = CenterCrop(crop_size)(hr_image)# 对高分辨率图像进行中心裁剪
        lr_image = lr_scale(hr_image) # 对图片lr_image使用lr_scale方式进行裁剪
        hr_restore_img = hr_scale(lr_image)# 缩放得到还原后的高分辨率图像
       #返回的是图片hr_image、lr_image、hr_restore_img的张量形式
        return ToTensor()(lr_image), ToTensor()(hr_restore_img), ToTensor()(hr_image)

    def __len__(self):
        return len(self.image_filenames)

#加载测试集中的图像数据
class TestDatasetFromFolder(Dataset):
    def __init__(self, dataset_dir, upscale_factor):
        super(TestDatasetFromFolder, self).__init__()
        self.lr_path = dataset_dir + '/SRF_' + str(upscale_factor) + '/data/'# 构建低分辨率图像文件路径
        self.hr_path = dataset_dir + '/SRF_' + str(upscale_factor) + '/target/' # 构建高分辨率图像文件路径
        self.upscale_factor = upscale_factor
        # 获取两个路径下的图像文件名,并保存在lr_filenames和hr_filenames列表中
        self.lr_filenames = [join(self.lr_path, x) for x in listdir(self.lr_path) if is_image_file(x)]
        self.hr_filenames = [join(self.hr_path, x) for x in listdir(self.hr_path) if is_image_file(x)]

    def __getitem__(self, index):
        # 获取给定索引的图像数据
        image_name = self.lr_filenames[index].split('/')[-1]
        lr_image = Image.open(self.lr_filenames[index])# 打开低分辨率图像文件
        w, h = lr_image.size# 获取低分辨率图像的宽度和高度
        hr_image = Image.open(self.hr_filenames[index])# 打开高分辨率图像文件
        hr_scale = Resize((self.upscale_factor*h, self.upscale_factor*w), interpolation=Image.BICUBIC) # 缩放高分辨率图像
        hr_restore_img = hr_scale(lr_image)# 缩放得到还原后的高分辨率图像
        # 将图像文件名、低分辨率图像、还原后的高分辨率图像和原始高分辨率图像转换为张量并返回
        return image_name, ToTensor()(lr_image), ToTensor()(hr_restore_img), ToTensor()(hr_image)

    def __len__(self):
        return len(self.lr_filenames)

pyssim.py

from math import exp

import torch
import torch.nn.functional as F
from torch.autograd import Variable

#生成一个高斯分布的窗口函数
def gaussian(window_size, sigma):#window_size 和 sigma,分别表示窗口的大小和标准差
    #使用列表推导式生成一个长度为window_size的张量gauss,其中每个元素的值为高斯函数在对应位置的取值(x 表示当前位置,window_size // 2 表示窗口的中心位置)
    gauss = torch.Tensor([exp(-(x - window_size // 2) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)])
    return gauss / gauss.sum()#返回归一化后的高斯分布张量

#创建一个二维高斯窗口
def create_window(window_size, channel):
    #使用gaussian函数生成一个长度为window_size的一维高斯分布张量,并通过unsqueeze函数将其维度扩展为 1。
    _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
    #通过矩阵乘法和转置操作,将一维高斯窗口函数转换为二维高斯窗口函数 _2D_window
    _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
    #使用expand函数将二维高斯窗口函数扩展为与输入通道数相同的维度,通过contiguous函数将其连续存储在内存中,以提高计算效率。
    window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous())
    return window #返回创建的高斯窗口变量 window

#计算图像的结构相似性指数(SSIM)
def _ssim(img1, img2, window, window_size, channel, size_average=True):
    #对输入的图像 img1 和 img2 进行卷积操作,得到均值 mu1、mu2,以及平方均值 mu1_sq、mu2_sq 和交叉均值 mu1_mu2
    mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel)
    mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel)

    mu1_sq = mu1.pow(2)
    mu2_sq = mu2.pow(2)
    mu1_mu2 = mu1 * mu2
    #计算图像的方差 sigma1_sq、sigma2_sq 和交叉方差 sigma12
    sigma1_sq = F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq
    sigma2_sq = F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq
    sigma12 = F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel) - mu1_mu2

    C1 = 0.01 ** 2
    C2 = 0.03 ** 2
    #SSIM 的计算公式
    ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
    #返回 SSIM map 的平均值或整个张量
    if size_average:
        return ssim_map.mean()
    else:
        return ssim_map.mean(1).mean(1).mean(1)


class SSIM(torch.nn.Module):
    def __init__(self, window_size=11, size_average=True):
        super(SSIM, self).__init__()
        self.window_size = window_size
        self.size_average = size_average
        self.channel = 1
        self.window = create_window(window_size, self.channel)

    def forward(self, img1, img2):
        #获取输入图像的大小
        (_, channel, _, _) = img1.size()

        if channel == self.channel and self.window.data.type() == img1.data.type():
            window = self.window
         # 如果窗口的类型与输入图像的类型不匹配,它会创建一个新的窗口,并将其存储在模块的属性中
        else:
            window = create_window(self.window_size, channel)

            if img1.is_cuda:
                window = window.cuda(img1.get_device())
            window = window.type_as(img1)

            self.window = window
            self.channel = channel
        #调用内部函数 _ssim 来计算 SSIM 值,并返回结果。
        return _ssim(img1, img2, window, self.window_size, channel, self.size_average)

#用于计算 SSIM 值的函数
def ssim(img1, img2, window_size=11, size_average=True):#两个图像和一个窗口作为输入
    #首先获取输入图像的大小,并创建一个高斯窗口
    (_, channel, _, _) = img1.size()
    window = create_window(window_size, channel)
    #如果输入图像在 GPU 上,它会将窗口也转换到 GPU 上
    if img1.is_cuda:
        window = window.cuda(img1.get_device())
    window = window.type_as(img1)
    #计算 SSIM 值
    return _ssim(img1, img2, window, window_size, channel, size_average)

loss.py

import torch
from torch import nn
from torchvision.models.vgg import vgg16


class GeneratorLoss(nn.Module):#生成损失函数
    def __init__(self):
        super(GeneratorLoss, self).__init__()
        # 使用预训练的 VGG16 模型来构建特征提取网络
        vgg = vgg16(pretrained=True)
        # 选择 VGG16 模型的前 31 层作为损失网络,并将其设置为评估模式(不进行梯度更新)
        loss_network = nn.Sequential(*list(vgg.features)[:31]).eval()
        # 冻结其参数,不进行梯度更新
        for param in loss_network.parameters():
            param.requires_grad = False# 屏蔽预训练模型的权重,只训练全连接层的权重
        self.loss_network = loss_network
        self.mse_loss = nn.MSELoss()# 定义均方误差损失函数: 计算生成器生成图像与目标图像之间的均方误差损失
        self.tv_loss = TVLoss()# 定义总变差损失函数: 计算生成器生成图像的总变差损失,用于平滑生成的图像(正则化损失)

    def forward(self, out_labels, out_images, target_images):
        # Adversarial Loss (对抗损失):使生成的图像更接近真实图像,目标是最小化生成器对图像的判别结果的平均值与 1 的差距
        adversarial_loss = torch.mean(1 - out_labels)
        # Perception Loss (感知损失):计算生成图像和目标图像在特征提取网络中提取的特征之间的均方误差损失
        perception_loss = self.mse_loss(self.loss_network(out_images), self.loss_network(target_images))
        # Image Loss  (图像损失):计算生成图像和目标图像之间的均方误差损失
        image_loss = self.mse_loss(out_images, target_images)
        # TV Loss  (总变差损失):计算生成图像的总变差损失,用于平滑生成的图像
        tv_loss = self.tv_loss(out_images)
        # 返回生成器的总损失,四个损失项加权求和
        return image_loss + 0.001 * adversarial_loss + 0.006 * perception_loss + 2e-8 * tv_loss


class TVLoss(nn.Module): #正则化损失函数:这种正则化损失倾向于保存图像的光滑性,防止图像出来变得过于像素化。
    def __init__(self, tv_loss_weight=1):
        super(TVLoss, self).__init__()#继承
        self.tv_loss_weight = tv_loss_weight

    def forward(self, x):
        batch_size = x.size()[0]#取x的第一个数
        h_x = x.size()[2]#第三个数
        w_x = x.size()[3]#最后一个数
        count_h = self.tensor_size(x[:, :, 1:, :])# H-X的最大值
        count_w = self.tensor_size(x[:, :, :, 1:])
        # 计算水平方向上的总变差损失
        h_tv = torch.pow((x[:, :, 1:, :] - x[:, :, :h_x - 1, :]), 2).sum()
        # 计算垂直方向上的总变差损失
        w_tv = torch.pow((x[:, :, :, 1:] - x[:, :, :, :w_x - 1]), 2).sum()
        # 返回总变差损失
        return self.tv_loss_weight * 2 * (h_tv / count_h + w_tv / count_w) / batch_size

    @staticmethod
    def tensor_size(t):
        # 返回张量的尺寸大小,即通道数乘以高度乘以宽度
        return t.size()[1] * t.size()[2] * t.size()[3]


if __name__ == "__main__":
    g_loss = GeneratorLoss()
    print(g_loss)


#完整vgg16的所有层数打印结果:
# VGG16(
#   (features): Sequential(
#     (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#     (1): ReLU(inplace)
#     (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#     (3): ReLU(inplace)
#     (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
#     (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#     (6): ReLU(inplace)
#     (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#     (8): ReLU(inplace)
#     (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
#     (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#     (11): ReLU(inplace)
#     (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#     (13): ReLU(inplace)
#     (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#     (15): ReLU(inplace)
#     (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
#     (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#     (18): ReLU(inplace)
#     (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#     (20): ReLU(inplace)
#     (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#     (22): ReLU(inplace)
#     (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
#     (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#     (25): ReLU(inplace)
#     (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#     (27): ReLU(inplace)
#     (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#     (29): ReLU(inplace)
#     (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
#   )

项目结构如下:

训练集、验证集为VOC2012。测试集为Set5、Set14、BSD100和Urban100。原作者github上有下载链接。lpips评价指标为改进部分,可以从github上搜一个有lpips的项目,把lpips文件夹复制过来。

引包部分根据自己的项目路径修改。代码是可以运行出来的。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值