tensorflow下基于卷积神经网络的图像风格迁移

最新推荐文章于 2023-05-17 22:00:20 发布
caoxionggang
最新推荐文章于 2023-05-17 22:00:20 发布
阅读量1k
点赞数
文章标签：卷积神经网络风格迁移
本文链接：https://blog.csdn.net/qq_40947321/article/details/87939641
版权
运行环境：anaconda下的jupyter notebook
代码：
from PIL import Image
import numpy as np
import scipy.misc
import scipy.io as sio
import tensorflow as  tf
import os


_vgg_params = None

#使用已经训练好的VGGNet19模型的参数，imagenet-vgg-verydeep-19.mat需要自行下载
def vgg_params():
    global _vgg_params
    if _vgg_params is None:
        _vgg_params = sio.loadmat('imagenet-vgg-verydeep-19.mat')
    return _vgg_params

def vgg19(input_image):
    layers = (
        'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
        'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
        'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4','pool3',
        'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
        'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4', 'pool5'
    )

    weights = vgg_params()['layers'][0] #weights为43维矩阵，weights[0]~weights[42]分别为每个卷积层，每个relu，每个pool，和全连接层、sofxmax的参数
    net = input_image
    network = {}
    for i,name in enumerate(layers):  #enumerate(sequence, [start=0]) 函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列，同时列出数据和数据下标，一般用在 for 循环当中。
        layer_type = name[:4]
        # 若是卷积层
        if layer_type == 'conv':
            w,bias = weights[i][0][0][0][0]  #该层的权重w参数矩阵和偏置矩阵
            # 由于 imagenet-vgg-verydeep-19.mat 中的参数矩阵和我们定义的长宽位置颠倒了，所以需要交换
            w = np.transpose(w,(1,0,2,3))
            conv = tf.nn.conv2d(net,tf.constant(w),strides=(1,1,1,1),padding='SAME',name=name)
            net = tf.nn.bias_add(conv,bias.reshape(-1))
            net = tf.nn.relu(net)
        # 若是池化层
        elif layer_type == 'pool':
            net = tf.nn.max_pool(net,ksize=(1,2,2,1),strides=(1,2,2,1),padding='SAME')
        # 将隐藏层加入到集合中
        # 若为`激活函数`直接加入集合
        network[name] = net #存储该图片在该层的处理信息

    return network #返回图片处理后的信息网络

#内容损失函数，就是特征值误差
def content_loss(target_features,content_features):
    _,height,width,channel = map(lambda i:i.value,content_features.get_shape())
    print ('content_features.get_shape() : ')
    print (content_features.get_shape())
    content_size = height * width * channel
    return tf.nn.l2_loss(target_features - content_features) / content_size

#风格损失函数。我们现将三维特征矩阵(-1,channel)重塑为二维矩阵，即一行代表一个特征值，三列分别是RGB。使用其格拉姆矩阵（）误差作为返回结果。
def style_loss(target_features,style_features):
    _,height,width,channel = map(lambda i:i.value,target_features.get_shape())
    print ('target_features.get_shape() : ')
    print (target_features.get_shape())
    size = height * width * channel
    target_features = tf.reshape(target_features,(-1,channel))
    target_gram = tf.matmul(tf.transpose(target_features),target_features) / size

    style_features = tf.reshape(style_features,(-1,channel))
    style_gram = tf.matmul(tf.transpose(style_features),style_features) / size

    return tf.nn.l2_loss(target_gram - style_gram) / size

STYLE_WEIGHT = 1
CONTENT_WEIGHT = 1
STYLE_LAYERS = ['relu1_2','relu2_2','relu3_2']
CONTENT_LAYERS = ['relu1_2']

#损失函数，权重STYLE_WEIGHT和CONTENT_WEIGHT可以控制优化更取趋于风格还是趋于内容
#STYLE_LAYERS的层数越多，就能挖掘出《星夜》越多样的风格特征，此层数应尽可能的多，才更能挖掘出风格图特征，层数越多迭代次数越多
#CONTENT_LAYERS中的隐藏层越深得到的特征越抽象，越浅就表达了目标图中原内容就越具像
def loss_function(style_image,content_image,target_image):
    style_features = vgg19([style_image]) #风格图经过vgg19处理后的信息网络
    content_features = vgg19([content_image]) #内容图经过vgg19处理后的信息网络
    target_features = vgg19([target_image]) #目标图经过vgg19处理后的信息网络
    loss = 0.0
    for layer in CONTENT_LAYERS:
        loss += CONTENT_WEIGHT * content_loss(target_features[layer],content_features[layer])

    for layer in STYLE_LAYERS:
        loss += STYLE_WEIGHT * style_loss(target_features[layer],style_features[layer])

    return loss


#风格转化
def stylize(style_image,content_image,learning_rate=0.1,epochs=100):
    
    target = tf.Variable(tf.random_normal(content_image.shape),dtype=tf.float32) #目标图像，先随机生成噪声图
    style_input = tf.constant(style_image,dtype=tf.float32)
    content_input = tf.constant(content_image, dtype=tf.float32)
    
    cost = loss_function(style_input,content_input,target)
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(cost)
    
    with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
        tf.global_variables_initializer().run()
        for i in range(epochs):
            _,loss,target_image = sess.run([train_op,cost,target])
            print("iter:%d,loss:%.9f" % (i, loss))
            if (i+1) % 100 == 0:
                image = np.clip(target_image + 128,0,255).astype(np.uint8) #np.clip：截取，将小于0或大于255的值强制设为0或255
                Image.fromarray(image).save("./f-neural_me_%d.jpg" % (i + 1)) #Image.fromarray()：array转换成image，并save保存


#图像数据
#读取两张图片，先用array()将图片转换为数组（整形数组），再用数组的astype方法将数据类型将其转为浮点数组，并且减去128.0，这样就以0为中心，可以加快收敛。两张图大小要相同
if __name__ == '__main__':
    style = Image.open('me.jpg')
    style = np.array(style).astype(np.float32) - 128.0 
    content = Image.open('主教.jpg')
    content = np.array(content).astype(np.float32) - 128.0
    stylize(style,content,0.5,100)
    #print(content.shape)
    #print(style.shape)