1. darknet53结构分解
1. DBL结构:
class DBL(nn.Module):
def __init__(self, inchannels, outchannels, k, s, p):
super(DBL, self).__init__()
self.l = nn.Conv2d(inchannels, outchannels, k, s, p)
self.bn = nn.BatchNorm2d(outchannels)
def forward(self, x):
out = self.l(x)
return F.leaky_relu(self.bn(out))
2. darknet53中残差网络的结构:
class ResBlock(nn.Module):
def __init__(self, inchannels, outchannels, k, p):
super(ResBlock, self).__init__()
self.l1 = nn.Conv2d(
inchannels[0], outchannels[0], kernel_size=k[0], stride=1, padding=p[0])
self.bn1 = nn.BatchNorm2d(outchannels[0])
self.l2 = nn.Conv2d(
inchannels[1], outchannels[1], kernel_size=k[1], stride=1, padding=p[1])
self.bn2 = nn.BatchNorm2d(outchannels[1])
def forward(self, x):
out = self.l1(x)
out = F.leaky_relu(self.bn1(out))
out = self.l2(out)
return F.leaky_relu(self.bn2(out)+x)
3. darknet53中,第一层卷积网络的结构:
class ConvBlock(nn.Module):
def __init__(self, inchannels, outchannels):
super(ConvBlock, self).__init__()
self.l = nn.Conv2d(inchannels, outchannels,
kernel_size=3, stride=1, padding=1)
self.bn = nn.BatchNorm2d(outchannels)
def forward(self, x):
out = self.l(x)
out = F.leaky_relu(self.bn(out))
return out
4. darknet53忘记写递归并且懒得写递归的懒人版本完整结构:
class Darknet53(nn.Module):
def __init__(self):
super(Darknet53, self).__init__()
self.l1 = nn.Sequential(
ConvBlock(3, 32),
nn.Conv2d(32, 64, 3, 2, 1),
ResBlock([64, 32], [32, 64], [1, 3], [0, 1]),
nn.Conv2d(64, 128, 3, 2, 1),
ResBlock([128, 64], [64, 128], [1, 3], [0, 1]),
ResBlock([128, 64], [64, 128], [1, 3], [0, 1]),
nn.Conv2d(128, 256, 3, 2, 1),
ResBlock([256, 128], [128, 256], [1, 3], [0, 1]),
ResBlock([256, 128], [128, 256], [1, 3], [0, 1]),
ResBlock([256, 128], [128, 256], [1, 3], [0, 1]),
ResBlock([256, 128], [128, 256], [1, 3], [0, 1]),
ResBlock([256, 128], [128, 256], [1, 3], [0, 1]),
ResBlock([256, 128], [128, 256], [1, 3], [0, 1]),
ResBlock([256, 128], [128, 256], [1, 3], [0, 1]),
ResBlock([256, 128], [128, 256], [1, 3], [0, 1]),
)
self.l2 = nn.Sequential(
nn.Conv2d(256, 512, 3, 2, 1),
ResBlock([512, 256], [256, 512], [1, 3], [0, 1]),
ResBlock([512, 256], [256, 512], [1, 3], [0, 1]),
ResBlock([512, 256], [256, 512], [1, 3], [0, 1]),
ResBlock([512, 256], [256, 512], [1, 3], [0, 1]),
ResBlock([512, 256], [256, 512], [1, 3], [0, 1]),
ResBlock([512, 256], [256, 512], [1, 3], [0, 1]),
ResBlock([512, 256], [256, 512], [1, 3], [0, 1]),
ResBlock([512, 256], [256, 512], [1, 3], [0, 1]),
)
self.l3 = nn.Sequential(
nn.Conv2d(512, 1024, 3, 2, 1),
ResBlock([1024, 512], [512, 1024], [1, 3], [0, 1]),
ResBlock([1024, 512], [512, 1024], [1, 3], [0, 1]),
ResBlock([1024, 512], [512, 1024], [1, 3], [0, 1]),
ResBlock([1024, 512], [512, 1024], [1, 3], [0, 1]),
)
# feature1
self.l4 = nn.Sequential(
DBL(1024, 512, 1, 1, 0),
DBL(512, 1024, 3, 1, 1),
DBL(1024, 512, 1, 1, 0),
DBL(512, 1024, 3, 1, 1),
DBL(1024, 512, 1, 1, 0),
)
self.l6 = nn.Sequential(
DBL(512, 1024, 3, 1, 1),
nn.Conv2d(1024, 255, 1, 1, 0),
)
# feather2
self.l7 = nn.Sequential(
DBL(512, 256, 1, 1, 0),
nn.Upsample(scale_factor=2, mode='nearest')
)
self.l8 = nn.Sequential(
DBL(768, 256, 1, 1, 0),
DBL(256, 512, 3, 1, 1),
DBL(512, 256, 1, 1, 0),
DBL(256, 512, 3, 1, 1),
DBL(512, 256, 1, 1, 0),
)
self.l9 = nn.Sequential(
DBL(256, 512, 3, 1, 1),
nn.Conv2d(512, 255, 1, 1, 0),
)
# feature3
self.l10 = nn.Sequential(
DBL(256, 128, 1, 1, 0),
nn.Upsample(scale_factor=2, mode='nearest'),
)
self.l11 = nn.Sequential(
DBL(384, 128, 1, 1, 0),
DBL(128, 256, 3, 1, 1),
DBL(256, 128, 1, 1, 0),
DBL(128, 256, 3, 1, 1),
DBL(256, 128, 1, 1, 0),
DBL(128, 256, 3, 1, 1),
nn.Conv2d(256, 255, 1, 1, 0),
)
def forward(self, x):
out1 = self.l1(x)
out2 = self.l2(out1)
out3 = self.l3(out2)
out4 = self.l4(out3)
# f1
f1 = self.l6(out4)
# f2
out5 = self.l7(out4)
out6 = torch.cat([out5, out2], dim=1)
out7 = self.l8(out6)
f2 = self.l9(out7)
# f3
out8 = self.l10(out7)
f3 = torch.cat([out8, out1], dim=1)
f3 = self.l11(f3)
return f1, f2, f3
之后我们随便找一张图,看三个stage的输出是否为我们所需要的结构:
pic = plt.imread("D:\\Users\\11234\\Desktop\\testpic\\3.jpg", 0) # 读取图片
trans = transforms.Compose([
transforms.ToTensor(),
transforms.Resize((416, 416)), #转换为416*416的大小
])
pic = trans(pic).unsqueeze(0) #增加一个维度
net = torch.nn.Sequential(Darknet53())
f1, f2, f3= net(pic)
print(f1.shape, f2.shape, f3.shape)
结果可以看到,三个stage大小分别为13,13;26,26;52,52。通道数都是255。255=num_box*(num_class+1), num_box为预先设置的每个像素的锚框的数量, num_class为可预测的种类,取决于你的数据集。例如,coco数据集可预测的种类为20种,那么此时num_class=20
(pytorch) PS E:\CODE_CSF\PytorchFile> python 03Darknet53.py
torch.Size([1, 255, 13, 13]) torch.Size([1, 255, 26, 26]) torch.Size([1, 255, 52, 52])
2. Anchor结构分解
1. 画框 Rectangle函数需要给定框的中心值和框的宽高,下面这个函数将框左上顶点坐标与右下顶点坐标四个参数转换为Rectangle所需的参数。
def Box2Rectpp(bbox, color):
return plt.Rectangle(xy=(bbox[0], bbox[1]), width=(bbox[2]-bbox[0]), height=(bbox[3]-bbox[1]), fill=False, edgecolor=color, linewidth=1)
2. 为每个pixel赋予anchor,锚框的数量为(s+r)-1,此函数需要输入featuremap的尺寸(如:13,26,52)之后会在featuremap对应的pixel上计算锚框的位置坐标,并输出。输出格式为(锚框数量*4)
def MultiBox_per_pixel(data, s, r, feature):
'''
读取顺序: 先从左至右, 后从上至下'''
# 输入data需是tensor
# 输入s需是list
# 输入r需是list
h, w = data.shape[1:3] # 获取data 高,宽
s_tensor = torch.tensor(s)
r_tensor = torch.tensor(r)
sqrt_r = torch.sqrt(r_tensor)
box_w = torch.cat( # box宽
(w*s_tensor[0]*sqrt_r, w*s_tensor[1:]*sqrt_r[0]), 0).long()
box_h = torch.cat( # box高
(h*s_tensor[0]/sqrt_r, h*s_tensor[1:]/sqrt_r[0]), 0).long()
num = box_w.shape[0]
feature_w, feature_h = int(w/feature), int(h/feature)
# center_x: 中心点x坐标 center_y:中心点y坐标
center_x, center_y = torch.arange(0, feature).long()*feature_w+int(feature_w//2), torch.arange(0, feature).long()*feature_h+int(feature_h//2)
center_x_mesh, box_w_mesh = torch.meshgrid(center_x, box_w)
center_y_mesh, box_h_mesh = torch.meshgrid(center_y, box_h)
box_x_one = torch.stack(
(((center_x_mesh-box_w_mesh/2).view(-1)), (center_x_mesh+box_w_mesh/2).view(-1)), 0)
box_y_one = torch.stack(
(((center_y_mesh-box_h_mesh/2).view(-1)), (center_y_mesh+box_h_mesh/2).view(-1)), 0)
box_x = box_x_one.repeat(1, feature)
box_y = box_y_one.T.reshape(-1).reshape(-1, 2 * num).repeat(1, feature).reshape(-1).reshape(-1, 2).T
outbox = torch.cat((box_x, box_y), 0).T[:, [0, 2, 1, 3]]
# 输出锚框 框左上 右下 点坐标 左x 左y 右x 右y
return outbox
3. 计算IoU。这部分较为简单
def IoU(box1, box2):
# box数据需为锚框的左上右下点坐标形式, box1为一堆框,box2为一个框, 输入维度:x*4
def area(box): return ((box[:, 2]-box[:, 0])*(box[:, 3]-box[:, 1]))
def areaa(box): return ((box[2]-box[0])*(box[3]-box[1]))
area1 = area(box1)
area2 = areaa(box2)
# find match conner
leftx = torch.max(box1[:, 0], box2[0])
lefty = torch.max(box1[:, 1], box2[1])
rightx = torch.min(box1[:, 2], box2[2])
righty = torch.min(box1[:, 3], box2[3])
# calculate area of inter
inter = (leftx-rightx)*(lefty-righty)
zero = torch.zeros_like(inter)
inter = torch.where(inter <= 0, zero, inter)
# Union
U = area1+area2-inter
return inter/U
4. 保留IoU较大的框。需要设置阈值,大于阈值的IoU可以留下, else:del
def assign_anchor_to_box(IoU_Threshold, bbox, anchor, mark):
'''IoU_Threshold:大于阈值的留存, 小于的当背景
bbox:预先设定好的边界框, 输入格式为 1*4 输入格式为tensor
ancher: 计算生成的锚框, 格式为 输入为tensor h*w*num*4 '''
bbox_num = bbox.shape[0]
Anchor_IoU = torch.empty(anchor.shape[0], bbox.shape[0])
zero = torch.zeros(anchor.shape[0], bbox.shape[0])
for i in range(bbox_num):
Anchor_IoU[:, i] = IoU(anchor, bbox[i, :])
AnchorIoU_more_than_Threshold = torch.where(
Anchor_IoU < IoU_Threshold, zero, Anchor_IoU)
# find the max value of anchor iou
max_index = Anchor_IoU.argmax(dim=0)
return max_index, AnchorIoU_more_than_Threshold
3. 数据集整理
我们要做的是手敲yolov3并且训练自己的数据集,所以我们不用诸如coco, voc2012这些数据集。我们需要自己建立一个数据集。
未完待续。。。。。写不动了,明天补充