选择过程,最先使用的BIformer,but,显存炸了,搜索发现BI方法就是存在内存占用大计算代价高的特点,所以选了localwindowsattention
那么如何在yolov8中加入注意力呢,和把大象装进冰箱的步骤一样
第一步:加入注意力代码,代码位置nn-modules-conv.py
最上方添加
from torch import Tensor
from typing import Tuple
from einops import rearrange#没有下载的需要先下载
import torch.nn.functional as F
import itertools
然后,在代码最下方添加注意力代码,源码放在下方,注意:conv2d_BN也需要
class Conv2d_BN(torch.nn.Sequential):
def __init__(self, a, b, ks=1, stride=1, pad=0, dilation=1,
groups=1, bn_weight_init=1, resolution=-10000):
super().__init__()
self.add_module('c', torch.nn.Conv2d(
a, b, ks, stride, pad, dilation, groups, bias=False))
self.add_module('bn', torch.nn.BatchNorm2d(b))
torch.nn.init.constant_(self.bn.weight, bn_weight_init)
torch.nn.init.constant_(self.bn.bias, 0)
@torch.no_grad()
def switch_to_deploy(self):
c, bn = self._modules.values()
w = bn.weight / (bn.running_var + bn.eps)**0.5
w = c.weight * w[:, None, None, None]
b = bn.bias - bn.running_mean * bn.weight / \
(bn.running_var + bn.eps)**0.5
m = torch.nn.Conv2d(w.size(1) * self.c.groups, w.size(
0), w.shape[2:], stride=self.c.stride, padding=self.c.padding, dilation=self.c.dilation, groups=self.c.groups)
m.weight.data.copy_(w)
m.bias.data.copy_(b)
return m
class CascadedGroupAttention(torch.nn.Module):
r""" Cascaded Group Attention.
Args:
dim (int): Number of in