1.数据处理 dst.py
训练集选用291, 测试集选用Set5+Set14
import tensorflow as tf
import numpy as np
import glob
from PIL import Image
import skimage.color as sc
def get_rand_aug(): # 生成随机数0-1 0-3 控制旋转和翻转
flip_idx = np.random.randint(2)
rot_idx = np.random.randint(4)
return flip_idx, rot_idx
def randInt(x, size): # 生成随机数0-(x - size)
return np.random.randint(x - size)
def randPos(h, w, size): # (h, w)内任意点
y = randInt(h, size)
x = randInt(w, size)
return y, x
def augmentation(img_np, flip_idx, rot_idx): # 旋转 翻转图片
if (flip_idx):
img_np = np.fliplr(img_np) # 翻转
img_np = np.rot90(img_np, rot_idx) # 旋转
return img_np
def np_crop(nparr, y, x, h, w=None): # 切分出h x w
if w == None:
w = h
return nparr[y:y + h, x:x + w]
def random_crop(path, hr_size): # 读取-切-翻转
idx = np.random.randint(len(path))
img = Image.open(path[idx])
img = np.asarray(img)
h, w, _ = img.shape
y, x = randPos(h, w, hr_size) # 切片点
hr = np_crop(img, y, x, hr_size) # 切
hr_flip, hr_rot = get_rand_aug() # 翻转、旋转参数
hr = augmentation(hr, hr_flip, hr_rot)
return hr
def gen(path, hr_size): # 生成迭代 使用opencv会报错 原因不清楚
while True:
hr = random_crop(path, hr_size)
yield hr
# 由hr得到bic和lr
def cubic(hr, lr_size, hr_size):
hr.set_shape([None, hr_size, hr_size, 3])
tlr = tf.image.resize(hr, [lr_size, lr_size], tf.image.ResizeMethod.BICUBIC, antialias=True)
tup = tf.image.resize(tlr, [hr_size, hr_size], tf.image.ResizeMethod.BICUBIC)
return tlr, tup, hr
def get_iter(path, batch_size, lr_size, hr_size):
dataset = tf.data.Dataset.from_generator(gen, output_types=tf.float32, args=(path, hr_size))
dataset = dataset.batch(batch_size) # 数据集大小输出为(batch,...)
dataset = dataset.map(lambda x: cubic(x, lr_size, hr_size)) # 对数据集中的每个元素应用函数f,得到一个新数据集 x为hr
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) # 这是一种并行化策略生成数据的同时训练上一次的数据
return iter(dataset)
def get_test(path):
imgs = []
for path in path:
img = Image.open(path)
imgs.append(np.array(img))
return imgs
# 归一化 变为-1 - 1
def normalize(img):
return ((img / 255) - 0.5) * 2
# 变回去
def unnormalize(img):
return (img + 1) * 0.5 * 255
# 将x变为0-255
def clip255(x):
return tf.clip_by_value(x, 0, 255)
# 使图片尺寸能被整除
def modular_crop(img_np, scale):
h, w, c = img_np.shape
h_, w_ = h//scale, w//scale
h_, w_ = h_ * scale, w_ * scale
return np_crop(img_np, 0, 0, h_, w_)
# resize
def resize_img(img_np, height, width, resample=Image.BICUBIC):
img = Image.fromarray(img_np)
resized = img.resize((width, height), resample=resample)
return np.array(resized)
def psnr(img1, img2, ycbcr=False, shave=0):
if ycbcr:
a = np.float32(img1)
b = np.float32(img2)
a = sc.rgb2ycbcr(a / 255)[:, :, 0]
b = sc.rgb2ycbcr(b / 255)[:, :, 0]
else:
a = np.array(img1).astype(np.float32)
b = np.array(img2).astype(np.float32)
if shave:
a = a[shave:-shave, shave:-shave]
b = b[shave:-shave, shave:-shave]
mse = np.mean((a - b) ** 2)
if mse == 0:
return 100
PIXEL_MAX = 255.0
return np.minimum(100.0, 20 * np.math.log10(PIXEL_MAX) - 10 * np.math.log10(mse))
if __name__ == '__main__':
train_path = '../291/*.*'
train_path_list = glob.glob(train_path)
PATCH_SIZE = 48
BATCH_SIZE = 32
ds2_it = get_iter(train_path_list, BATCH_SIZE, PATCH_SIZE // 2, PATCH_SIZE)
ds3_it = get_iter(train_path_list, BATCH_SIZE, PATCH_SIZE // 3, PATCH_SIZE)
ds4_it = get_iter(train_path_list, BATCH_SIZE, PATCH_SIZE // 4, PATCH_SIZE)
2.网络模型
import tensorflow as tf
import numpy as np
from x2.dd.utils import getVariable, normalize, unnormalize, clip255
# def getVariable(shape, name):
# initializer = tf.initializers.GlorotUniform()
# return tf.Variable(initializer(shape=shape), name=name)
class VDSR(tf.keras.Model):
def __init__(self):
super(VDSR, self).__init__()
# !!! 参数初始化必须在构造函数里面
self.Kernels = []
initializer = tf.initializers.GlorotUniform()
self.Kernels.append(tf.Variable(initializer(shape=[3, 3, 3, 64]), name='c1')) # name必不能少
for i in range(18):
self.Kernels.append(tf.Variable(initializer(shape=[3, 3, 64, 64]), name='c' + str(i+2)))
self.Kernels.append(tf.Variable(initializer(shape=[3, 3, 64, 3]), name='c20'))
@tf.function
def __call__(self, inputs):
upcubic = inputs
act_fn = tf.nn.relu
layer = norm_cuibc = (((upcubic / 255.) - 0.5) * 2)
layer = tf.nn.conv2d(layer, self.Kernels[0], strides=[1, 1, 1, 1], padding='SAME', dilations=[1, 1, 1, 1])
layer = act_fn(layer)
for i in range(18):
layer = tf.nn.conv2d(layer, self.Kernels[i+1], strides=[1, 1, 1, 1], padding='SAME', dilations=[1, 1, 1, 1])
layer = act_fn(layer)
layer = tf.nn.conv2d(layer, self.Kernels[-1], strides=[1, 1, 1, 1], padding='SAME', dilations=[1, 1, 1, 1])
y = layer + norm_cuibc
return y
2.VDSR网络
第一层:3x3x3x64
第二–十九层:3x3x64x64
最后一层:3x3x64x3
激活函数 relu
对于更深层的神经网络,初始化使用 he_normal(MSRA/He 等人的初始化),在网络具有大量参数(即 VGGNet)时,该方法特别有效。
import tensorflow as tf
import numpy as np
from x2.dd.utils import getVariable, normalize, unnormalize, clip255
class VDSR(tf.keras.Model):
def __init__(self):
super(VDSR, self).__init__()
# !!! 参数初始化必须在构造函数里面!!!
self.Kernels = []
initializer = tf.initializers.GlorotUniform()
self.Kernels.append(tf.Variable(initializer(shape=[3, 3, 3, 64]), name='c1')) # name必不能少
for i in range(18):
self.Kernels.append(tf.Variable(initializer(shape=[3, 3, 64, 64]), name='c' + str(i+2)))
self.Kernels.append(tf.Variable(initializer(shape=[3, 3, 64, 3]), name='c20'))
@tf.function
def __call__(self, inputs):
upcubic = inputs
act_fn = tf.nn.relu
layer = norm_cuibc = (((upcubic / 255.) - 0.5) * 2)
layer = tf.nn.conv2d(layer, self.Kernels[0], strides=[1, 1, 1, 1], padding='SAME', dilations=[1, 1, 1, 1])
layer = act_fn(layer)
for i in range(18):
layer = tf.nn.conv2d(layer, self.Kernels[i+1], strides=[1, 1, 1, 1], padding='SAME', dilations=[1, 1, 1, 1])
layer = act_fn(layer)
layer = tf.nn.conv2d(layer, self.Kernels[-1], strides=[1, 1, 1, 1], padding='SAME', dilations=[1, 1, 1, 1])
y = layer + norm_cuibc
return y
3.训练
import numpy as np
import tensorflow as tf
import glob
from dst import get_iter, get_test, normalize, unnormalize, clip255, modular_crop, resize_img, psnr
#from x2.VDSR.networks.vdsr import VDSR
from vdsr import VDSR
# 一些参数
BATCH_SIZE = 32 # 批次
PATCH_SIZE = 48 # 图块大小
EPOCH = 100 # 训练次数
ITER_PER_EPOCH = 50 # 每个epoch训练多少次
SCALES = [2, 3, 4]
train_path = '../291/*.*'
test_path1 = '../test/Set5/*.*'
test_path2 = '../test/Set14/*.*'
save_path = './ckpt/vdsr.ckpt'
TOTAL_ITER = EPOCH * ITER_PER_EPOCH # 总次数
LEARNING_RATE = 1e-4 # 学习效率
LAMBDA = 1e-3
# 训练集
train_path_list = glob.glob(train_path)
train_x2 = get_iter(train_path_list, BATCH_SIZE, PATCH_SIZE // 2, PATCH_SIZE)
train_x3 = get_iter(train_path_list, BATCH_SIZE, PATCH_SIZE // 3, PATCH_SIZE)
train_x4 = get_iter(train_path_list, BATCH_SIZE, PATCH_SIZE // 4, PATCH_SIZE)
# 测试集
test_path_list = glob.glob(test_path1) + glob.glob(test_path2)
test = get_test(test_path_list)
# 模型
model = VDSR()
# model.build(input_shape=[BATCH_SIZE, PATCH_SIZE, PATCH_SIZE, 3])
# 优化器
opt = tf.optimizers.Adam(LEARNING_RATE)
# 学习效率衰减
def cosine_decay(global_step):
decay = 0.5 * (1 + np.cos(np.pi * global_step / TOTAL_ITER)) # 0.5 * (1 + (-1 - 1))
decayed = (1 - LAMBDA) * decay + LAMBDA
decayed_learning_rate = LEARNING_RATE * decayed
return decayed_learning_rate
# 训练一次
def train_step(lr, input2, input3, input4, hr2, hr3, hr4):
opt.learning_rate.assign(lr) # 设置学习效率
with tf.GradientTape() as tape: # x2,x3,x4三种倍率是一起训练的
output2 = model(input2)
output3 = model(input3)
output4 = model(input4)
# MSE
loss2 = tf.reduce_mean(tf.abs(output2 - normalize(hr2)))
loss3 = tf.reduce_mean(tf.abs(output3 - normalize(hr3)))
loss4 = tf.reduce_mean(tf.abs(output4 - normalize(hr4)))
cost = loss2 + loss3 + loss4
# 梯度下降 求梯度,优化
grads = tape.gradient(cost, model.trainable_variables)
opt.apply_gradients(zip(grads, model.trainable_variables))
# 反标准化
y_pred2 = clip255(unnormalize(output2))
y_pred3 = clip255(unnormalize(output3))
y_pred4 = clip255(unnormalize(output4))
db2 = tf.reduce_mean(tf.math.minimum(tf.image.psnr(y_pred2, hr2, 255), 100))
db3 = tf.reduce_mean(tf.math.minimum(tf.image.psnr(y_pred3, hr3, 255), 100))
db4 = tf.reduce_mean(tf.math.minimum(tf.image.psnr(y_pred4, hr4, 255), 100))
return cost, db2, db3, db4
# 测试
def validate(scale):
dbs = []
for i in test:
hr = i
hr = modular_crop(hr, scale)
h, w, _ = hr.shape
lr = resize_img(hr, h // scale, w // scale)
upcubic = resize_img(lr, h, w)
inputs = np.asarray([upcubic], np.float32)
output = model(inputs)
y_pred = clip255(unnormalize(output))
dbs.append(psnr(y_pred, hr))
return np.mean(dbs)
# 训练
def train():
for epoch in range(1, EPOCH + 1):
log_lists = np.zeros(4)
for i in range(ITER_PER_EPOCH):
lr2, upcubic2, hr2 = next(train_x2)
lr3, upcubic3, hr3 = next(train_x3)
lr4, upcubic4, hr4 = next(train_x4)
input2 = upcubic2
input3 = upcubic3
input4 = upcubic4
next_lr = tf.constant(cosine_decay(ITER_PER_EPOCH * (epoch - 1) + i), dtype=tf.float32)
cost, db2, db3, db4 = train_step(next_lr, input2, input3, input4, hr2, hr3, hr4)
log_lists += np.asarray([cost, db2, db3, db4])
pt = list(log_lists / ITER_PER_EPOCH)
print('epoch:', epoch, ', cost:', pt[0], ', db2:', pt[1], ', db3:', pt[2], ', db3:', pt[3])
if epoch % 1 == 0:
print('x2 test psnr:', validate(2))
print('x3 test psnr:', validate(3))
print('x4 test psnr:', validate(4))
print('*' * 50)
if epoch % 5 == 0:
tf.saved_model.save(model, './vdsr_pd')
model.save_weights('./ckpt/vdsr.ckpt')
train()
训练过程展示
4.测试
bicubic与VDSR对比