运行环境:anaconda下的jupyter notebook
代码:
from PIL import Image
import numpy as np
import scipy.misc
import scipy.io as sio
import tensorflow as tf
import os
_vgg_params = None
#使用已经训练好的VGGNet19模型的参数,imagenet-vgg-verydeep-19.mat需要自行下载
def vgg_params():
global _vgg_params
if _vgg_params is None:
_vgg_params = sio.loadmat('imagenet-vgg-verydeep-19.mat')
return _vgg_params
def vgg19(input_image):
layers = (
'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4','pool3',
'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4', 'pool5'
)
weights = vgg_params()['layers'][0] #weights为43维矩阵,weights[0]~weights[42]分别为每个卷积层,每个relu,每个pool,和全连接层、sofxmax的参数
net = input_image
network = {}
for i,name in enumerate(layers): #enumerate(sequence, [start=0]) 函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,同时列出数据和数据下标,一般用在 for 循环当中。
layer_type = name[:4]
# 若是卷积层
if layer_type == 'conv':
w,bias = weights[i][0][0][0][0] #该层的权重w参数矩阵和偏置矩阵
# 由于 imagenet-vgg-verydeep-19.mat 中的参数矩阵和我们定义的长宽位置颠倒了,所以需要交换
w = np.transpose(w,(1,0,2,3))
conv = tf.nn.conv2d(net,tf.constant(w),strides=(1,1,1,1),padding='SAME',name=name)
net = tf.nn.bias_add(conv,bias.reshape(-1))
net = tf.nn.relu(net)
# 若是池化层
elif layer_type == 'pool':
net = tf.nn.max_pool(net,ksize=(1,2,2,1),strides=(1,2,2,1),padding='SAME')
# 将隐藏层加入到集合中
# 若为`激活函数`直接加入集合
network[name] = net #存储该图片在该层的处理信息
return network #返回图片处理后的信息网络
#内容损失函数,就是特征值误差
def content_loss(target_features,content_features):
_,height,width,channel = map(lambda i:i.value,content_features.get_shape())
print ('content_features.get_shape() : ')
print (content_features.get_shape())
content_size = height * width * channel
return tf.nn.l2_loss(target_features - content_features) / content_size
#风格损失函数。我们现将三维特征矩阵(-1,channel)重塑为二维矩阵,即一行代表一个特征值,三列分别是RGB。使用其格拉姆矩阵()误差作为返回结果。
def style_loss(target_features,style_features):
_,height,width,channel = map(lambda i:i.value,target_features.get_shape())
print ('target_features.get_shape() : ')
print (target_features.get_shape())
size = height * width * channel
target_features = tf.reshape(target_features,(-1,channel))
target_gram = tf.matmul(tf.transpose(target_features),target_features) / size
style_features = tf.reshape(style_features,(-1,channel))
style_gram = tf.matmul(tf.transpose(style_features),style_features) / size
return tf.nn.l2_loss(target_gram - style_gram) / size
STYLE_WEIGHT = 1
CONTENT_WEIGHT = 1
STYLE_LAYERS = ['relu1_2','relu2_2','relu3_2']
CONTENT_LAYERS = ['relu1_2']
#损失函数,权重STYLE_WEIGHT和CONTENT_WEIGHT可以控制优化更取趋于风格还是趋于内容
#STYLE_LAYERS的层数越多,就能挖掘出《星夜》越多样的风格特征,此层数应尽可能的多,才更能挖掘出风格图特征,层数越多迭代次数越多
#CONTENT_LAYERS中的隐藏层越深得到的特征越抽象,越浅就表达了目标图中原内容就越具像
def loss_function(style_image,content_image,target_image):
style_features = vgg19([style_image]) #风格图经过vgg19处理后的信息网络
content_features = vgg19([content_image]) #内容图经过vgg19处理后的信息网络
target_features = vgg19([target_image]) #目标图经过vgg19处理后的信息网络
loss = 0.0
for layer in CONTENT_LAYERS:
loss += CONTENT_WEIGHT * content_loss(target_features[layer],content_features[layer])
for layer in STYLE_LAYERS:
loss += STYLE_WEIGHT * style_loss(target_features[layer],style_features[layer])
return loss
#风格转化
def stylize(style_image,content_image,learning_rate=0.1,epochs=100):
target = tf.Variable(tf.random_normal(content_image.shape),dtype=tf.float32) #目标图像,先随机生成噪声图
style_input = tf.constant(style_image,dtype=tf.float32)
content_input = tf.constant(content_image, dtype=tf.float32)
cost = loss_function(style_input,content_input,target)
train_op = tf.train.AdamOptimizer(learning_rate).minimize(cost)
with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
tf.global_variables_initializer().run()
for i in range(epochs):
_,loss,target_image = sess.run([train_op,cost,target])
print("iter:%d,loss:%.9f" % (i, loss))
if (i+1) % 100 == 0:
image = np.clip(target_image + 128,0,255).astype(np.uint8) #np.clip:截取,将小于0或大于255的值强制设为0或255
Image.fromarray(image).save("./f-neural_me_%d.jpg" % (i + 1)) #Image.fromarray():array转换成image,并save保存
#图像数据
#读取两张图片,先用array()将图片转换为数组(整形数组),再用数组的astype方法将数据类型将其转为浮点数组,并且减去128.0,这样就以0为中心,可以加快收敛。两张图大小要相同
if __name__ == '__main__':
style = Image.open('me.jpg')
style = np.array(style).astype(np.float32) - 128.0
content = Image.open('主教.jpg')
content = np.array(content).astype(np.float32) - 128.0
stylize(style,content,0.5,100)
#print(content.shape)
#print(style.shape)