本文是针对yolov5热力图可视化的实现过程,并指出其中需要更改的地方。更改后便可以即插即用,较为简单。
一、代码
源码地址:yolo-gradcam
Github上的代码已经复制在下方,如下所示。
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')
import torch, yaml, cv2, os, shutil
import numpy as np
np.random.seed(0)
import matplotlib.pyplot as plt
from tqdm import trange
from PIL import Image
from models.yolo import Model
from utils.general import intersect_dicts
from utils.augmentations import letterbox
from utils.general import xywh2xyxy
from pytorch_grad_cam import GradCAMPlusPlus, GradCAM, XGradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.activations_and_gradients import ActivationsAndGradients
class yolov5_heatmap:
def __init__(self, weight, cfg, device, method, layer, backward_type, conf_threshold, ratio):
device = torch.device(device)
ckpt = torch.load(weight)
model_names = ckpt['model'].names
csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32
model = Model(cfg, ch=3, nc=len(model_names)).to(device)
csd = intersect_dicts(csd, model.state_dict(), exclude=['anchor']) # intersect
model.load_state_dict(csd, strict=False) # load
model.eval()
print(f'Transferred {len(csd)}/{len(model.state_dict())} items')
target_layers = [eval(layer)]
method = eval(method)
colors = np.random.uniform(0, 255, size=(len(model_names), 3)).astype(np.int)
self.__dict__.update(locals())
def post_process(self, result):
logits_ = result[..., 4:]
boxes_ = result[..., :4]
sorted, indices = torch.sort(logits_[..., 0], descending=True)
return logits_[0][indices[0]], xywh2xyxy(boxes_[0][indices[0]]).cpu().detach().numpy()
def draw_detections(self, box, color, name, img):
xmin, ymin, xmax, ymax = list(map(int, list(box)))
cv2.rectangle(img, (xmin, ymin), (xmax, ymax), tuple(int(x) for x in color), 2)
cv2.putText(img, str(name), (xmin, ymin - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, tuple(int(x) for x in color), 2, lineType=cv2.LINE_AA)
return img
def __call__(self, img_path, save_path):
# remove dir if exist
if os.path.exists(save_path):
shutil.rmtree(save_path)
# make dir if not exist
os.makedirs(save_path, exist_ok=True)
# img process
img = cv2.imread(img_path)
img = letterbox(img)[0]
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.float32(img) / 255.0
tensor = torch.from_numpy(np.transpose(img, axes=[2, 0, 1])).unsqueeze(0).to(self.device)
# init ActivationsAndGradients
grads = ActivationsAndGradients(self.model, self.target_layers, reshape_transform=None)
# get ActivationsAndResult
result = grads(tensor)
activations = grads.activations[0].cpu().detach().numpy()
# postprocess to yolo output
post_result, post_boxes = self.post_process(result[0])
for i in trange(int(post_result.size(0) * self.ratio)):
if post_result[i][0] < self.conf_threshold:
break
self.model.zero_grad()
if self.backward_type == 'conf':
post_result[i, 0].backward(retain_graph=True)
else:
# get max probability for this prediction
score = post_result[i, 1:].max()
score.backward(retain_graph=True)
# process heatmap
gradients = grads.gradients[0]
b, k, u, v = gradients.size()
weights = self.method.get_cam_weights(self.method, None, None, None, activations, gradients.detach().numpy())
weights = weights.reshape((b, k, 1, 1))
saliency_map = np.sum(weights * activations, axis=1)
saliency_map = np.squeeze(np.maximum(saliency_map, 0))
saliency_map = cv2.resize(saliency_map, (tensor.size(3), tensor.size(2)))
saliency_map_min, saliency_map_max = saliency_map.min(), saliency_map.max()
if (saliency_map_max - saliency_map_min) == 0:
continue
saliency_map = (saliency_map - saliency_map_min) / (saliency_map_max - saliency_map_min)
# add heatmap and box to image
cam_image = show_cam_on_image(img.copy(), saliency_map, use_rgb=True)
cam_image = self.draw_detections(post_boxes[i], self.colors[int(post_result[i, 1:].argmax())], f'{self.model_names[int(post_result[i, 1:].argmax())]} {post_result[i][0]:.2f}', cam_image)
cam_image = Image.fromarray(cam_image)
cam_image.save(f'{save_path}/{i}.png')
def get_params():
params = {
'weight': 'runs/train/exp/weights/best.pt',
'cfg': 'models/yolov5m.yaml',
'device': 'cuda:0',
'method': 'XGradCAM', # GradCAMPlusPlus, GradCAM, XGradCAM
'layer': 'model.model[-2]',
'backward_type': 'class', # class or conf
'conf_threshold': 0.6, # 0.6
'ratio': 0.02 # 0.02-0.1
}
return params
if __name__ == '__main__':
model = yolov5_heatmap(**get_params())
model(r'dataset\images\test\aircraft_1064.jpg', 'result')
二、实现步骤
2.1 代码复制
建立一个v5-heatmap.py文件将上述代码复制到工程文件中,如下所示
2.2 环境配置
安装pytorch_grad_cam库,直接pip即可,缺什么pip什么
pip install pytorch_grad_cam
2.3 参数修改
找到get_params(),对其中的参数进行更改
def get_params():
params = {
'weight': 'yolov5m.pt',
'cfg': 'ultralytics/models/yolov5m.yaml',
'device': 'cuda:0',
'method': 'GradCAM', # GradCAMPlusPlus, GradCAM, XGradCAM
'layer': 'model.model[8]',
'backward_type': 'all', # class, box, all
'conf_threshold': 0.6, # 0.6
'ratio': 0.02 # 0.02-0.1
}
return params
get_params中的参数:
- weight:
模型权重文件,代码默认是yolov5m.pt
- cfg:
模型文件,代码默认是yolov5m.yaml,需要注意的是需要跟weight中的预训练文件的配置是一样的,不然会报错
- device:
选择使用GPU还是CPU
- method:
选择grad-cam方法,默认是GradCAM,这里是提供了几种,可能对效果有点不一样,大家大胆尝试。
- layer::
选择需要可视化的层数,只需要修改数字即可,比如想用第9层,也就是model.model[9]。
- backward_type:
反向传播的方式,可以是以conf的loss传播,也可以class的loss传播,一般选用all,效果比较好一点。
- conf_threshold:
置信度,默认是0.6。
- ratio:
默认是0.02,就是用来筛选置信度高的结果,低的就舍弃,0.02则是筛选置信度最高的前2%的图像来进行热力图。
2.4 图片路径修改
将这个代码中的图片路进行更改,换成自己的图片路径,路径不能有中文!!!,否则会报错。
if __name__ == '__main__':
model = yolov5_heatmap(**get_params())
model(r'图片路径', 'result') #路径不能有中文!!!
2.5 代码运行
代码运行前首先要对问题进行排除,可以转到第三部分,修改完后直接运行,热力图就出炉了!
运行方法可以直接run也可以在终端中直接输入命令
python v5-heatmap.py
2.6 结果生成
由于上面保存路径在results,所以图片会在此生成
三、问题bug解决
1.问题一
出现报错 ImportError: cannot import name 'intersect_dicts' from 'utils.general'
解决方法:
由于intersect_dicts移到torch_utils中,在general中找不到,所以将from utils.general import intersect_dicts改为
from utils.torch_utils import intersect_dicts
2.问题二
出现报错 AttributeError: module 'numpy' has no attribute 'int'.
解决方法:
之前有个修改bug的文章中给出了解决方法,此处再直接给大家指出。Numpy报错:AttributeError: module ‘numpy‘ has no attribute ‘int‘-CSDN博客
将下面这个代码
colors = np.random.uniform(0, 255, size=(len(model_names), 3)).astype(np.int)
改为
colors = np.random.uniform(0, 255, size=(len(model_names), 3)).astype(np.int_)
3.问题三
出现报错 RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation:...
解决方法:
由于inplace设置的原因,所以找到models/yolo.py文件大概106行或者ctrl+F找到以下代码
改为
self.inplace = self.yaml.get('inplace', False)
四、预测框去除
方法:
将第二个cam_image注释掉
如下所示
五、修改后代码
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')
import torch, yaml, cv2, os, shutil
import numpy as np
np.random.seed(0)
import matplotlib.pyplot as plt
from tqdm import trange
from PIL import Image
from models.yolo import Model
from utils.torch_utils import intersect_dicts
from utils.augmentations import letterbox
from utils.general import xywh2xyxy
from pytorch_grad_cam import GradCAMPlusPlus, GradCAM, XGradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.activations_and_gradients import ActivationsAndGradients
class yolov5_heatmap:
def __init__(self, weight, cfg, device, method, layer, backward_type, conf_threshold, ratio):
device = torch.device(device)
ckpt = torch.load(weight)
model_names = ckpt['model'].names
csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32
model = Model(cfg, ch=3, nc=len(model_names)).to(device)
csd = intersect_dicts(csd, model.state_dict(), exclude=['anchor']) # intersect
model.load_state_dict(csd, strict=False) # load
model.eval()
print(f'Transferred {len(csd)}/{len(model.state_dict())} items')
target_layers = [eval(layer)]
method = eval(method)
colors = np.random.uniform(0, 255, size=(len(model_names), 3)).astype(np.int_)
self.__dict__.update(locals())
def post_process(self, result):
logits_ = result[..., 4:]
boxes_ = result[..., :4]
sorted, indices = torch.sort(logits_[..., 0], descending=True)
return logits_[0][indices[0]], xywh2xyxy(boxes_[0][indices[0]]).cpu().detach().numpy()
def draw_detections(self, box, color, name, img):
xmin, ymin, xmax, ymax = list(map(int, list(box)))
cv2.rectangle(img, (xmin, ymin), (xmax, ymax), tuple(int(x) for x in color), 2)
cv2.putText(img, str(name), (xmin, ymin - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, tuple(int(x) for x in color), 2,
lineType=cv2.LINE_AA)
return img
def __call__(self, img_path, save_path):
# remove dir if exist
if os.path.exists(save_path):
shutil.rmtree(save_path)
# make dir if not exist
os.makedirs(save_path, exist_ok=True)
# img process
img = cv2.imread(img_path)
img = letterbox(img)[0]
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.float32(img) / 255.0
tensor = torch.from_numpy(np.transpose(img, axes=[2, 0, 1])).unsqueeze(0).to(self.device)
# init ActivationsAndGradients
grads = ActivationsAndGradients(self.model, self.target_layers, reshape_transform=None)
# get ActivationsAndResult
result = grads(tensor)
activations = grads.activations[0].cpu().detach().numpy()
# postprocess to yolo output
post_result, post_boxes = self.post_process(result[0])
for i in trange(int(post_result.size(0) * self.ratio)):
if post_result[i][0] < self.conf_threshold:
break
self.model.zero_grad()
if self.backward_type == 'conf':
post_result[i, 0].backward(retain_graph=True)
else:
# get max probability for this prediction
score = post_result[i, 1:].max()
score.backward(retain_graph=True)
# process heatmap
gradients = grads.gradients[0]
b, k, u, v = gradients.size()
weights = self.method.get_cam_weights(self.method, None, None, None, activations,
gradients.detach().numpy())
weights = weights.reshape((b, k, 1, 1))
saliency_map = np.sum(weights * activations, axis=1)
saliency_map = np.squeeze(np.maximum(saliency_map, 0))
saliency_map = cv2.resize(saliency_map, (tensor.size(3), tensor.size(2)))
saliency_map_min, saliency_map_max = saliency_map.min(), saliency_map.max()
if (saliency_map_max - saliency_map_min) == 0:
continue
saliency_map = (saliency_map - saliency_map_min) / (saliency_map_max - saliency_map_min)
# add heatmap and box to image
cam_image = show_cam_on_image(img.copy(), saliency_map, use_rgb=True)
cam_image = self.draw_detections(post_boxes[i], self.colors[int(post_result[i, 1:].argmax())],
f'{self.model_names[int(post_result[i, 1:].argmax())]} {post_result[i][0]:.2f}',
cam_image) ## 不用预测框则注释 ##
cam_image = Image.fromarray(cam_image)
cam_image.save(f'{save_path}/{i}.png')
def get_params():
params = {
'weight': 'yolov5m.pt',
'cfg': 'models/yolov5m.yaml',
'device': 'cuda:0',
'method': 'GradCAM', # GradCAMPlusPlus, GradCAM, XGradCAM
'layer': 'model.model[9]',
'backward_type': 'class', # class or conf
'conf_threshold': 0.6, # 0.6
'ratio': 0.02 # 0.02-0.1
}
return params
if __name__ == '__main__':
model = yolov5_heatmap(**get_params())
model(r'/data/images/bus.jpg', 'result') # 第一个是检测的文件, 第二个是保存的路径
注:有任何问题欢迎评论区交流讨论或者私信!