文章目录
Grad-CAM类激活可视化
如何获取图像分类模型的类激活热图。
import numpy as np
import tensorflow as tf
from tensorflow import keras
# Display
from IPython.display import Image, display
import matplotlib.pyplot as plt
import matplotlib.cm as cm
可配置参数
您可以将它们更改为其他模型。
要获取的值last_conv_layer_name用model.summary() 看到模型中所有层的名称。
model_builder = keras.applications.xception.Xception
img_size = (299, 299)
preprocess_input = keras.applications.xception.preprocess_input
decode_predictions = keras.applications.xception.decode_predictions
last_conv_layer_name = "block14_sepconv2_act"
# The local path to our target image
img_path = keras.utils.get_file(
"african_elephant.jpg", "https://i.imgur.com/Bvro0YD.png"
)
display(Image(img_path))
Grad-CAM算法
def get_img_array(img_path, size):
# `img` is a PIL image of size 299x299
img = keras.preprocessing.image.load_img(img_path, target_size=size)
# `array` is a float32 Numpy array of shape (299, 299, 3)
array = keras.preprocessing.image.img_to_array(img)
# We add a dimension to transform our array into a "batch"
# of size (1, 299, 299, 3)
array = np.expand_dims(array, axis=0)
return array
def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
# First, we create a model that maps the input image to the activations
# of the last conv layer as well as the output predictions
# 首先,我们创建一个模型,将输入图像映射到最后一个conv层的激活以及输出预测
grad_model = tf.keras.models.Model(
[model.inputs], [model.get_layer(last_conv_layer_name).output, model.output]
)
# Then, we compute the gradient of the top predicted class for our input image
# with respect to the activations of the last conv layer
#然后,我们为输入图像计算top预测类关于最后一个conv层的激活的梯度
with tf.GradientTape() as tape:
last_conv_layer_output, preds = grad_model(img_array)
#如果没有传入pred_index,就计算pred[0]中最大的值对应的下标号index
if pred_index is None:
pred_index = tf.argmax(preds[0])
class_channel = preds[:, pred_index]
# This is the gradient of the output neuron (top predicted or chosen)这是输出神经元(预测概率最高的或者选定的那个)对最后一个卷积层输出特征图的梯度
# with regard to the output feature map of the last conv layer
# grads.shape(1, 10, 10, 2048)
grads = tape.gradient(class_channel, last_conv_layer_output)
# This is a vector where each entry is the mean intensity of the gradient这是一个向量,每一项都是 指定特征图通道上的平均值
# over a specific feature map channel
# pooled_grads 是一个一维向量,shape=(2048,)
pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
# We multiply each channel in the feature map array
# by "how important this channel is" with regard to the top predicted class
# then sum all the channels to obtain the heatmap class activation
# last_conv_layer_output[0]是一个三维的卷积层 ,@矩阵相乘(点积)
#last_conv_layer_output.shape =(10, 10, 2048)
last_conv_layer_output = last_conv_layer_output[0]
#heatmap (10, 10, 1) = (10, 10, 2048) @(2048,)相当于(10, 10, 2048)乘以(2048,1)
heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
# tf.squeeze 去除1的维度,(10, 10)
heatmap = tf.squeeze(heatmap)
# For visualization purpose, we will also normalize the heatmap between 0 & 1
# tf.maximum(heatmap, 0) 和0比较大小,返回一个>=0的值,相当于relu,然后除以heatmap中最大的 值,进行normalize归一化到0-1
heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
return heatmap.numpy()
测试一下
img_array = preprocess_input(get_img_array(img_path, size=img_size))
# Make model
model = model_builder(weights="imagenet")
# Remove last layer's softmax
model.layers[-1].activation = None
# Print what the top predicted class is
preds = model.predict(img_array)
print("Predicted:", decode_predictions(preds, top=1)[0])#这些地方所加的0皆是batch中第一个sample的意思
# Generate class activation heatmap
heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name)
# Display heatmap
plt.matshow(heatmap)
plt.show()
Predicted: [('n02504458', 'African_elephant', 9.862388)]
创建叠加的可视化
def save_and_display_gradcam(img_path, heatmap, cam_path="cam.jpg", alpha=0.4):
# Load the original image
img = keras.preprocessing.image.load_img(img_path)
img = keras.preprocessing.image.img_to_array(img)
# Rescale heatmap to a range 0-255
heatmap = np.uint8(255 * heatmap)
# Use jet colormap to colorize heatmap
jet = cm.get_cmap("jet")
# Use RGB values of the colormap
jet_colors = jet(np.arange(256))[:, :3]
jet_heatmap = jet_colors[heatmap]
# Create an image with RGB colorized heatmap
jet_heatmap = keras.preprocessing.image.array_to_img(jet_heatmap)
jet_heatmap = jet_heatmap.resize((img.shape[1], img.shape[0]))
jet_heatmap = keras.preprocessing.image.img_to_array(jet_heatmap)
# Superimpose重叠 the heatmap on original image
superimposed_img = jet_heatmap * alpha + img
superimposed_img = keras.preprocessing.image.array_to_img(superimposed_img)
# Save the superimposed image
superimposed_img.save(cam_path)
# Display Grad CAM
display(Image(cam_path))
save_and_display_gradcam(img_path, heatmap)
让我们尝试另一张图片
我们将看到grad cam如何解释多标签图像的模型输出。让我们一起尝试将猫和狗结合在一起的图像,看看grad cam的行为。
img_path = keras.utils.get_file(
"cat_and_dog.jpg",
"https://storage.googleapis.com/petbacker/images/blog/2017/dog-and-cat-cover.jpg",
)
display(Image(img_path))
# Prepare image
img_array = preprocess_input(get_img_array(img_path, size=img_size))
# Print what the two top predicted classes are
preds = model.predict(img_array)
print("Predicted:", decode_predictions(preds, top=2)[0])
Predicted: [('n02112137', 'chow', 4.611241), ('n02124075', 'Egyptian_cat', 4.3817368)]
我们为“ chow”生成类激活热图,类索引为260
heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=260)
save_and_display_gradcam(img_path, heatmap)
我们为“埃及猫”生成类激活热图,类索引为285
heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=285)
save_and_display_gradcam(img_path, heatmap)
参考
总结
使用model.summary()可以查看模型的层名字,例如
注意拷贝的时候不要拷贝到最后一个空格.
通过model.layers[-2].name这种方式可以获得任一层的名字
对要预测的图片,进行image.load_img()
img_to_array转化为np_array
np.expand_dims(x, axis=0)扩展维度,
最后预处理
preprocess_input(x) ,送入preds = model.predict(x)
np.argmax(preds[0])获得了最大的分数对应的标号例如386
然后你就得model.output[:, 386] 这是获得预测向量中的386对应的该类别的值.
model.get_layer(‘block14_sepconv2’) 可以获得某个卷积层的特征图,
本文代码执行逻辑
定义求梯度模型,preds是是输出
class_channel 是预测的类别对应的向量
grad_model = tf.keras.models.Model(
[model.inputs], [model.get_layer(last_conv_layer_name).output, model.output]
with tf.GradientTape() as tape:
last_conv_layer_output, preds = grad_model(img_array)
if pred_index is None:
pred_index = tf.argmax(preds[0])
class_channel = preds[:, pred_index]
grads = tape.gradient(class_channel, last_conv_layer_output)
grads 就是输出对最后一个卷积层的梯度,shape是和最后一个卷积层shape一致
grads.shape(1, 10, 10, 2048)
pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
这是得到 pooled_grads 是一个一维向量,shape=(2048,)
这就是卷积层各层的权重,即各层的重要程度
pooled_grads[..., tf.newaxis] 后,pooled_grads变成(2048,1)
last_conv_layer_output 是(1,10,10,2048)
last_conv_layer_output = last_conv_layer_output[0]
heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
heatmap = tf.squeeze(heatmap)
这里的heatmap已经变成了10,10的矩阵
heatmap归一化到0-1
# For visualization purpose, we will also normalize the heatmap between 0 & 1
heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
return heatmap.numpy()
~~以往的tf1的gradcam所做的一点不同是,求出关于卷积层的梯度grads后,没有直接池化 该grads
即没有执行pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
而是让梯度和卷积层,逐个元素对应相乘elementwise
因为grads确实是每个元素的权重
如下~~
tf1的代码
#这里取了[0] grads已经是三维
grads = K.gradients(output, last_layer.output)[0]
#pooled_grads shape是一维,即每个通道的权重
pooled_grads = K.mean(grads, axis=(0, 1, 2)) # 对每张梯度特征图进行平均,
# 返回的是一个大小是通道维数的张量
iterate = K.function([model.input], [pooled_grads, last_layer.output[0]])
pooled_grads_value, conv_layer_output_value = iterate([img])
#对于channel last是shape[-1]
for i in range(pooled_grads.shape[-1]):
conv_layer_output_value[:, :, i] *= pooled_grads_value[i]
最后那个for循环,即对每个通道应用梯度权重
等价于
tf2的
heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
需要准备的就是,最后一个卷积层的层名字,预测类的那个结果向量
最后一个卷积层的通道维度,这里可以用函数获得,layer.shape[-1]
迁移学习,类激活图
在迁移学习过程中,最后一个卷积层一般比较小,例如efficientnet的no_top的最后一层是7
7
1280,使用grad_cam后先对该卷积层求导,得到grads是1
7
7
*1280
全局pool后得到1*
1280权重值
最后和该卷积层相乘,得到hearmap是7*7的
对heartmap归一化到0-1
然后叠加的时候,缩放到和原图片一样大小
往往类激活图不太准了