classCausalSelfAttention(nn.Module):"""
A vanilla multi-head masked self-attention layer with a projection at the end.
It is possible to use torch.nn.MultiheadAttention here but I am including an
explicit implementation here to show that there is nothing too scary here.
"""def__init__(self, config):super().__init__()assert config.n_embd % config.n_head ==0# key, query, value projections for all heads
self.key = nn.Linear(config.n_embd, config.n_embd)
self.query = nn.Linear(config.n_embd, config.n_embd)
self.value = nn.Linear(config.n_embd, config.n_embd)# regularization
self.attn_drop = nn.Dropout(config.attn_pdrop)
self.resid_drop = nn.Dropout(config.resid_pdrop)# output projection
self.proj = nn.Linear(config.n_embd, config.n_embd)# causal mask to ensure that attention is only applied to the left in the input sequence
self.register_buffer("mask", torch.tril(torch.ones(config.block_size, config.block_size)).view(1,1, config.block_size, config.block_size))
self.n_head = config.n_head
defforward(self, x, layer_past=None):
B, T, C = x.size()# calculate query, key, values for all heads in batch and move head forward to be the batch dim
k = self.key(x).view(B, T, self.n_head, C // self.n_head).transpose(1,2)# (B, nh, T, hs)
q = self.query(x).view(B, T, self.n_head, C // self.n_head).transpose(1,2)# (B, nh, T, hs)
v = self.value(x).view(B, T, self.n_head, C // self.n_head).transpose(1,2)# (B, nh, T, hs)# causal self-attention; Self-attend: (B, nh, T, hs) x (B, nh, hs, T) -> (B, nh, T, T)
att =(q @ k.transpose(-2,-1))*(1.0/ math.sqrt(k.size(-1)))
att = att.masked_fill(self.mask[:,:,:T,:T]==0,float('-inf'))
att = F.softmax(att, dim=-1)
att = self.attn_drop(att)
y = att @ v # (B, nh, T, T) x (B, nh, T, hs) -> (B, nh, T, hs)
y = y.transpose(1,2).contiguous().view(B, T, C)# re-assemble all head outputs side by side# output projection
y = self.resid_drop(self.proj(y))return y
NMS
import numpy as np
defnms(dets, thresh):
x1 = dets[:,0]
y1 = dets[:,1]
x2 = dets[:,2]
y2 = dets[:,3]
scores = dets[:,4]
areas =(x2 - x1 +1)*(y2 - y1 +1)
order = scores.argsort()[::-1]
keep =[]while order.size >0:#还有数据
i = order[0]
keep.append(i)#计算当前概率最大矩形框与其他矩形框的相交框的坐标
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])#计算相交框的面积
w = np.maximum(0.0, xx2 - xx1 +1)
h = np.maximum(0.0, yy2 - yy1 +1)
inter = w * h #计算重叠度IOU:重叠面积/(面积1+面积2-重叠面积)
IOU = inter /(areas[i]+ areas[order[1:]]- inter)#找到重叠度不高于阈值的矩形框索引
left_index = np.where(IOU <= thresh)[0]#将order序列更新,由于前面得到的矩形框索引要比矩形框在原order序列中的索引小1,所以要把这个1加回来
order = order[left_index +1]print(keep)