【吴恩达深度学习编程作业】4.4特殊应用——人脸识别和神经风格转换（问题未解决）

本文链接：https://blog.csdn.net/keiven_/article/details/110134112

神经网络风格中遇到的问题已经解决了并将解决方案写在了备注里面，但是人脸识别那里运行到database就出错了，目前仍没有找到解决方案。我觉得是因为我电脑的CUDA版本是9.1.84，而2.3.1版本的tensorflow-gpu版本支持10.1以上的CUDA，不想再卸载重装tf了。

报错信息：

Traceback (most recent call last):
  File "G:/Project/PYTHON/Demo01/Deep_Learning/test4_4/人脸识别.py", line 108, in <module>
    database["danielle"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/danielle.png", FRmodel)
  File "G:\Project\PYTHON\Demo01\Deep_Learning\test4_4\fr_utils.py", line 198, in img_to_encoding
    embedding = model.predict_on_batch(x_train)
  File "F:\Python\lib\site-packages\tensorflow\python\keras\engine\training_v1.py", line 1214, in predict_on_batch
    outputs = self.predict_function(inputs)
  File "F:\Python\lib\site-packages\tensorflow\python\keras\backend.py", line 3822, in __call__
    self._make_callable(feed_arrays, feed_symbols, symbol_vals, session)
  File "F:\Python\lib\site-packages\tensorflow\python\keras\backend.py", line 3759, in _make_callable
    callable_fn = session._make_callable_from_options(callable_opts)
  File "F:\Python\lib\site-packages\tensorflow\python\client\session.py", line 1505, in _make_callable_from_options
    return BaseSession._Callable(self, callable_options)
  File "F:\Python\lib\site-packages\tensorflow\python\client\session.py", line 1460, in __init__
    session._session, options_ptr)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Default MaxPoolingOp only supports NHWC on device type CPU
	 [[{{node max_pooling2d/MaxPool}}]]

1.人脸验证与人脸识别？？？

main.py

"""
    代码实现：
        1.实现三元组损失函数
        2.使用一个已经训练好了的模型来将人脸图像映射到一个128位数字的的向量
        3.使用这些编码来执行人脸验证和人脸识别。
"""
from keras.models import Sequential, Model
from keras.layers import Conv2D, ZeroPadding2D, Activation, Input, concatenate
from keras.layers.normalization import BatchNormalization
from keras.layers.pooling import MaxPooling2D,AveragePooling2D
from keras.layers.merge import Concatenate
from keras.layers.core import Lambda, Flatten, Dense
from keras.initializers import glorot_uniform
from keras.engine.topology import Layer
from keras import backend as K

# 用于绘制模型的细节
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model

K.set_image_data_format('channels_first')


import time
import cv2
import os
os.environ['CUDA_VISIBLE_DEVICES']='0'  # 设置GPU设备
import numpy as np
from numpy import genfromtxt
import pandas as pd
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import Deep_Learning.test4_4.fr_utils
from Deep_Learning.test4_4.inception_blocks_v2 import *

# 全部打印
# np.set_printoptions(threshold=np.nan)     # 旧版本
np.set_printoptions(threshold=np.inf)



# 使用卷积网络将人脸图像编码为128位的向量，输入(m,n_c,n_h,n_w)=(m,3,96,96),输出(m,128)

# 获取模型
FRmodel = faceRecoModel(input_shape=(3, 96, 96))
# 打印模型的总参数数量
print("参数数量：" + str(FRmodel.count_params()))    # 参数数量：3743280
# 绘制模型细节
plot_model(FRmodel, to_file="FRmodel.png")
SVG(model_to_dot(FRmodel).create(prog='dot', format='svg'))


# 定义三元组损失函数
def triplet_loss(y_true, y_pred, alpha=0.2):
    """
    实现三元组损失函数
    :param y_true:      -true标签，当在keras里定义了一个损失函数的时候需要它，这里不需要
    :param y_pred:      -列表类型，包含以下参数：
                            anchor  -给定的anchor图像的编码，维度为(None,128)
                            positive    -positive图像的编码，维度为(None,128)
                            negative    -negative图像的编码，维度为(None,128)
    :param alpha:       -超参数，阈值
    :return: loss       -实数，损失的值
    """

    # 获取anchor,positive,negative的图像编码
    anchor, positive, negative = y_pred[0], y_pred[1], y_pred[2]

    # 1.计算anchor与positive之间编码的距离
    pos_dist = tf.reduce_mean(tf.square(tf.subtract(anchor, positive)), axis=-1)

    # 2.计算anchor与negative之间编码的距离
    neg_dist = tf.reduce_mean(tf.square(tf.subtract(anchor, negative)), axis=-1)

    # 3.减去之前的两个距离，加上alpha
    basic_loss = tf.add(tf.subtract(pos_dist, neg_dist), alpha)

    # 通过取带0的最大值和对训练样本的求和来计算整个公式
    loss = tf.reduce_sum(tf.maximum(basic_loss, 0))

    return loss

# 测试triplet_loss
print("=====================测试triplet_loss===============")
with tf.compat.v1.Session() as test:
    tf.compat.v1.set_random_seed(1)
    y_true = (None, None, None)
    y_pred = (tf.compat.v1.random_normal([3, 128], mean=6, stddev=0.1, seed=1),
              tf.compat.v1.random_normal([3, 128], mean=1, stddev=1, seed=1),
              tf.compat.v1.random_normal([3, 128], mean=3, stddev=4, seed=1))
    loss = triplet_loss(y_true, y_pred)
    print("loss = " + str(loss.eval()))     # loss = 4.522995


# 加载已经训练好了的模型
start_time = time.clock()   # 开始时间
# 编译模型
FRmodel.compile(optimizer='adam', loss=triplet_loss, metrics=['accuracy'])
# 加载权值
Deep_Learning.test4_4.fr_utils.load_weights_from_FaceNet(FRmodel)
end_time = time.clock()     # 结束时间
# 计算时差
minium = end_time - start_time
print("执行了：" + str(int(minium / 60)) + "分" + str(int(minium % 60)) + "秒")   # 执行了：1分16秒


# ????????????卡这里了
# 构建人脸验证数据库，使用img_to_encoding(image_path, model)生成编码，它会根据图像进行模型的前向传播
database = {}
database["danielle"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/danielle.png", FRmodel)
database["younes"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/younes.jpg", FRmodel)
database["tian"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/tian.jpg", FRmodel)
database["andrew"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/andrew.jpg", FRmodel)
database["kian"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/kian.jpg", FRmodel)
database["dan"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/dan.jpg", FRmodel)
database["sebastiano"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/sebastiano.jpg", FRmodel)
database["bertrand"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/bertrand.jpg", FRmodel)
database["kevin"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/kevin.jpg", FRmodel)
database["felix"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/felix.jpg", FRmodel)
database["benoit"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/benoit.jpg", FRmodel)
database["arnaud"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/arnaud.jpg", FRmodel)


# 面部验证：验证摄像头的照片是否与身份证上的名称匹配
def verity(image_path, identity, database, model):
    """
    对"identity"与"image_path"的编码进行验证
    :param image_path:  -摄像头的照片
    :param identity:    -字符类型，想要验证的人的名字
    :param databasse:   -字典类型，包含了成员的名字信息与对应的编码
    :param model:       -在keras的模型的实例
    :return:dist        -摄像头的图片与数据库中的图片的编码的距离
            is_open_door    -boolean, 是否该开门
    """

    # 1.计算图像的编码，使用fr_utils.img_to_encoding()来计算
    encoding = Deep_Learning.test4_4.fr_utils.img_to_encoding(image_path, model)

    # 2.计算与数据库中保存的编码的差距L2范数
    dist = np.linalg.norm(encoding - database[identity])

    # 3.判断是否开门
    if dist < 0.7:
        print("欢迎" + str(identity) + "回家")
        is_door_open = True
    else:
        print("经验证，您与" + str(identity) + "不符")
        is_door_open = False

    return dist, is_door_open

print("======================测试verity===================")
# ??????????切片不正确，可能是版本问题导致的
verity("images1/camera_0.jpg", "younes", database, FRmodel)
verity("images1/camera_2.jpg", "kian", database, FRmodel)


# 人脸识别
def who_is_it(image_path, database, model):
    """
    根据指定的图片进行人脸验证
    :param image_path:  -图像地址
    :param database:    -包含了名字与编码的字典
    :param model:       -在keras中的模型的实现
    :return: min_dist   -在数据库中与指定图像最接近的编码
             identity   -字符串类型，与min_dist编码相对应的名字
    """

    # 1.计算指定图像的编码，使用fr_utils.img_to_encoding()来计算
    encoding = Deep_Learning.test4_4.fr_utils.img_to_encoding(image_path, model)

    # 2.找到最相近的编码
    # 2.1初始化min_dist为足够大的数字，这里设置为100
    min_dist = 100

    # 2.2遍历数据库找到最相近的编码
    for(name, db_enc) in database.items():
        # 2.2.1计算目标编码与当前数据库编码之间的L2差距
        dist = np.linalg.norm(encoding - db_enc)
        # 2.2.2如果差距小于min_dist，那么更新名字与编码到identity与min_dist中
        if dist < min_dist:
            min_dist = dist
            identity = name

    # 判断是否在数据库中
    if min_dist > 0.7:
        print("抱歉，您的信息不在数据库中")
    else:
        print("姓名" + str(identity) + "  差距：" + str(min_dist))

    return min_dist, identity

print("======================测试who_is_it===================")
who_is_it("images1/camera_0.jpg", database, FRmodel)

2.神经风格转换

main.py

"""
    代码实现：
        1.实现神经风格转换算法
        2.用算法生成新的艺术图像
    之前都是优化一个成本函数来获得一组参数值，这里优化成本函数以获取像素值
"""
import imageio
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import numpy as np
import tensorflow as tf
tf.compat.v1.disable_eager_execution()

import Deep_Learning.test4_4.nst_utils


# 使用VGG-19模型:这里报错TypeError: conv2d_v2() got an unexpected keyword argument 'filter'改一下nst_utils里的112行tf.compat.v1.nn.conv2d
model = Deep_Learning.test4_4.nst_utils.load_vgg_model("pretrained-model/imagenet-vgg-verydeep-19.mat")
print(model)    # {'input': <tf.Variable 'Variable:0' shape=(1, 300, 400, 3) dtype=float32>...

# 计算内容代价
# content_image = scipy.misc.imread("image2/louvre.jpg")
content_image = imageio.imread("image2/louvre.jpg")
imshow(content_image)
plt.show()

def compute_content_cost(a_C, a_G):
    """
    计算内容代价的函数
    :param a_C:     -tensor类型，维度为(1,n_H,n_W,n_C),表示隐藏层中图像C的内容的激活值
    :param a_G:     -tensor类型，维度为(1,n_H,n_W,n_C),表示隐藏层中图像G的内容的激活值
    :return: J_content  -实数
    """

    # 获取a_G的维度信息
    m, n_H, n_W, n_C = a_G.get_shape().as_list()

    # 对a_C与a_G从3维降到2维
    a_C_unrolled = tf.transpose(tf.reshape(a_C, [n_H * n_W, n_C]))
    a_G_unrolled = tf.transpose(tf.reshape(a_G, [n_H * n_W, n_C]))

    # 计算内容代价
    J_content = 1 / (4 * n_H * n_W * n_C) * tf.reduce_sum(tf.square(tf.subtract(a_C_unrolled, a_G_unrolled)))

    return J_content

tf.compat.v1.reset_default_graph()
with tf.compat.v1.Session() as test:
    tf.compat.v1.set_random_seed(1)
    a_C = tf.compat.v1.random_normal([1, 4, 4, 3], mean=1, stddev=4)
    a_G = tf.compat.v1.random_normal([1, 4, 4, 3], mean=1, stddev=4)
    J_content = compute_content_cost(a_C, a_G)
    print("J_content = " + str(J_content.eval()))   # J_content = 7.6410217
    test.close()


# 计算风格代价
style_image = imageio.imread("image2/monet_800600.jpg")
imshow(style_image)
plt.show()

def gram_matrix(A):
    """
    风格矩阵GA = A A.T
    :param A:   -矩阵，维度(n_C,n_H*n_W)
    :return: GA -A的风格矩阵，维度(n_C,n_C)
    """
    GA = tf.matmul(A, tf.transpose(A))

    return GA

tf.compat.v1.reset_default_graph()
with tf.compat.v1.Session() as test:
    tf.compat.v1.set_random_seed(1)
    A = tf.compat.v1.random_normal([3, 2 * 1], mean=1, stddev=4)
    GA = gram_matrix(A)
    print("GA = " + str(GA.eval()))
    test.close()
"""
    运行结果：
        GA = [[ 15.615461  12.248833 -29.87157 ]
         [ 12.248833  10.877857 -19.879116]
         [-29.87157  -19.879116  67.08007 ]]
"""


# 计算单层风格代价函数
def compute_layer_style_cost(a_S, a_G):
    """
    :param a_S:     -tensor类型，维度为(1,n_H,n_W,n_C),风格图像的隐藏层激活值
    :param a_G:     -tensor类型，维度为(1,n_H,n_W,n_C),生成图像的隐藏层激活值
    :return: J_style_layer  -风格代价
    """

    # 获取a_G的维度信息
    m, n_H, n_W, n_C = a_G.get_shape().as_list()

    # 变形,将隐藏层激活值a_S和a_G展开到二维矩阵中
    a_S = tf.reshape(a_S, (n_H * n_W, n_C))
    a_G = tf.reshape(a_G, (n_H * n_W, n_C))

    # 计算风格矩阵
    GS = gram_matrix(tf.transpose(a_S))
    GG = gram_matrix(tf.transpose(a_G))

    # 计算损失值
    J_style_layer = 1 / (4 * (n_C ** 2) * ((n_H * n_W) ** 2)) \
                    * tf.reduce_sum(tf.square(tf.subtract(GS, GG)))

    return J_style_layer

tf.compat.v1.reset_default_graph()
with tf.compat.v1.Session() as test:
    tf.compat.v1.set_random_seed(1)
    a_S = tf.compat.v1.random_normal([1, 4, 4, 3], mean=1, stddev=4)
    a_G = tf.compat.v1.random_normal([1, 4, 4, 3], mean=1, stddev=4)
    J_style_layer = compute_layer_style_cost(a_S, a_G)
    print("J_style_layer = " + str(J_style_layer.eval()))   # J_style_layer = 2.2849257
    test.close()


# 风格权重，可以合并不同层的风格成本
STYLE_LAYERS = [('conv1_1', 0.2),
                ('conv2_1', 0.2),
                ('conv3_1', 0.2),
                ('conv4_1', 0.2),
                ('conv5_1', 0.2)]


# 定义多层风格代价函数
def compute_style_cost(model, STYLE_LAYERS):
    """
     Computes the overall style cost from several chosen layers
    :param model:   -tensorflow模型
    :param STYLE_LAYERS:    -列表，包括层的名称和它们的系数
    :return:J_style -tensor类型，风格代价
    """

    # 初始化
    J_style = 0

    for layer_name, coeff in STYLE_LAYERS:
        # 选择当前选定图层的输出张量
        out = model[layer_name]
        # 通过在外部运行会话，将a_S设置为所选层的隐藏层激活
        a_S = sess.run(out)
        # 设置a_G为同一层的隐藏层激活。在此，a_G引用了模型[layer_name]，尚未进行评估。然后将图像G分配为模型输入。
        a_G = out

        # 计算当前层的风格代价
        J_style_layer = compute_layer_style_cost(a_S, a_G)

        # 将这一层的coeff * J_style_layer添加到整体样式成本中
        J_style += coeff * J_style_layer

        return J_style


# 定义总代价
def total_cost(J_content, J_style, alpha=10, beta=40):
    """
    计算总代价函数
    :param J_content:   -内容代价
    :param J_style:     -风格代价
    :param alpha:       -超参数，内容代价的权重
    :param beta:        -超参数，风格代价的权重
    :return: J          -总代价
    """
    J = alpha * J_content + beta * J_style

    return J

tf.compat.v1.reset_default_graph()
with tf.compat.v1.Session() as test:
    tf.compat.v1.set_random_seed(3)
    J_content = np.random.randn()
    J_style = np.random.randn()
    J = total_cost(J_content, J_style)
    print("J = " + str(J))  # J = 34.76142525659208
    test.close()



# 整体实现
tf.compat.v1.reset_default_graph()
# 1.创建Interactive Session：
"""
    与常规session不同，“Interactive Session”将自己安装为构建graph的默认session。
    这允许运行变量而不需要经常引用session对象，简化了代码。
"""
sess = tf.compat.v1.InteractiveSession()

# 2.加载内容图像，整形并归一化
content_image = imageio.imread("image2/dks.png")
content_image = Deep_Learning.test4_4.nst_utils.reshape_and_normalize_image(content_image)

# 3.加载风格图像，整形并归一化
style_image = imageio.imread("image2/xingkong.jpg")
style_image = Deep_Learning.test4_4.nst_utils.reshape_and_normalize_image(style_image)

# 4.随机初始化要生成的图像
"""
    我们将“生成的”图像初始化为从内容图像创建的带噪声图像。
    通过初始化生成的图像的像素，使其主要是噪声，但仍然与内容图像有轻微的相关性，
    这将有助于“生成”图像的内容更快地匹配“内容”图像的内容。
"""
generated_image = Deep_Learning.test4_4.nst_utils.generate_noise_image(content_image)
imshow(generated_image[0])
plt.show()

# 5.加载VGG-19模型
model = Deep_Learning.test4_4.nst_utils.load_vgg_model("pretrained-model/imagenet-vgg-verydeep-19.mat")

# 6.建立TensorFlow graph
# 6.1运行内容图像并计算内容成本
# 6.1.1将内容图像指定为VGG模型的输入
sess.run(model['input'].assign(content_image))
# 6.1.2设a_C作为“conv4_2”层的隐含层激活的张量
out = model['conv4_2']
a_C = sess.run(out)
# 6.1.3设a_G作为同一层的隐含层激活的张量
a_G = out
J_content = compute_content_cost(a_C, a_G)
"""
    注意:在这里a_C是一个张量，还没有被赋值。
    当在model_nn()中运行Tensorflow graph时，它将在每次迭代中被评价和更新。
"""

# 6.2运行风格图像并计算风格成本
sess.run(model['input'].assign(style_image))
J_style = compute_style_cost(model, STYLE_LAYERS)

# 6.3计算总成本
J = total_cost(J_content, J_style, 10, 40)

# 6.4定义优化器Adam和学习率2.0
optimizer = tf.compat.v1.train.AdamOptimizer(2.0)
train_step = optimizer.minimize(J)

# 7.初始化TensorFlow graph并在大量迭代中运行它，在每一次迭代中更新生成的图像。
def model_nn(sess, input_image, num_iterations=500):
    """
    初始化tensorflow graph的变量，将输入图像(初始生成的图像)指定为VGG19模型的输入，并多次运行train_step。
    :param sess:            -session
    :param input_image:     -输入图像
    :param num_iterations:  -迭代次数
    :return:
    """

    # 初始化global variables
    sess.run(tf.compat.v1.global_variables_initializer())

    #通过模型运行嘈杂的输入图像（初始生成的图像）
    generated_image = sess.run(model["input"].assign(input_image))

    for i in range(num_iterations):
        # 运行会话以最小化总代价
        sess.run(train_step)
        # 通过在当前模型上运行会话来计算生成的图像
        generated_image = sess.run(model["input"])

        if i % 20 == 0:
            Jt, Jc, Js = sess.run([J, J_content, J_style])
            print("Iteration" + str(i) + ":")
            print("total cost = " + str(Jt))
            print("content cost = " + str(Jc))
            print("style cost = " + str(Js))

            Deep_Learning.test4_4.nst_utils.save_image("output/" + str(i) + ".png", generated_image)

    Deep_Learning.test4_4.nst_utils.save_image('output/generated_image.jpg', generated_image)

    return generated_image


model_nn(sess, generated_image)


# 将自己的图片调为400*300并替换文件就可以啦

运行结果

Iteration20:
total cost = 945719.5
content cost = 12107.339
style cost = 20616.152
Iteration40:
total cost = 311538.5
content cost = 10928.709
style cost = 5056.2847
Iteration60:
total cost = 200105.4
content cost = 10215.002
style cost = 2448.885
...
total cost = 51185.676
content cost = 4928.8657
style cost = 47.425514