pytorch版yolov3源码解读

前期准备

在学习过yolov3的相关原理后感觉距离实践是有一定差距的,在github上找到了一版源码。本文对这篇文章的源码进行中文注释,以求更加深入的了解实现的过程。
github的地址为

https://github.com/ayooshkathuria/pytorch-yolo-v3

这份代码目的是为了在pytorch框架下,使用原生的配置文件、权重文件,对获取到的新数据进行检测。
原作者提供了一个教程,连接如下

https://blog.paperspace.com/how-to-implement-a-yolo-object-detector-in-pytorch/

相对应的中文教程由两部分

https://www.jiqizhixin.com/articles/2018-04-23-3
https://www.jiqizhixin.com/articles/042602?from=synced&keyword=%E4%BB%8E%E9%9B%B6%E5%BC%80%E5%A7%8BPyTorch%E9%A1%B9%E7%9B%AE%EF%BC%9AYOLO%20v3%E7%9B%AE%E6%A0%87%E6%A3%80%E6%B5%8B%E5%AE%9E%E7%8E%B0

下面进行中文注释

darknet.py

parse_cfg

首先对配置文件进行了解析

def parse_cfg(cfgfile):
    #解析配置文件
    #输出为模块的list, 每个模块都对应着网络中的某一个模块
    file = open(cfgfile, 'r')                               #打开文件
    lines = file.read().split('\n')                         #按行读入
    lines = [x for x in lines if len(x) > 0]                #去除空行
    lines = [x for x in lines if x[0] != '#']               #去除注释
    lines = [x.rstrip().lstrip() for x in lines]            #两边的空格

    block = {}
    blocks = []
    for line in lines:
        if line[0] == "[":                                   #查找模块的标识符,代表新模块的建立
            if len(block) != 0:                              #如果block中包含其他信息,则先将数据存储并置空
                blocks.append(block)
                block = {}
            block["type"] = line[1:-1].rstrip()              #去除两边的中括号
        else:
            key,value = line.split("=")                     #将每一行的数据按照等号进行分割
            block[key.rstrip()] = value.lstrip()            #存入词典
    blocks.append(block)                                    #将最后一个模块存入列表中
    return blocks

create_modules

创建模型中,要将配置文件中的信息转化为modules信息和网络信息。
这里对于前传过程可以确定的层,如“convolutional”层,其中可能包含了“Conv2d”,“BatchNorm2d”,“LeakyReLU”,可以用nn.Sequential进行包装,在nn.Sequential包装后,内部会自动调用forward()方法。对于一些前传方法不确定的层,如“route”,“shortcut”可以显示用空层占位,具体的实现放在class Darknet中

def create_modules(blocks):
    net_info = blocks[0]             #读取网络配置
    module_list = nn.ModuleList()    #初始化网络结构
    index = 0                        #帮助跳过某些层
    prev_filters = 3                 #记录前一个网络的卷积核的数量,也就是前一层数据
    output_filters = []              #输出卷积核的数量
    for x in blocks:                 #按照不同block生成不同的层
        module = nn.Sequential()     #利用Sequential包裹每一个每个block
        if (x["type"] == "net"):     #跳过net
            continue
        #If it's a convolutional layer
        if (x["type"] == "convolutional"):          #卷积层处理
            #Get the info about the layer
            activation = x["activation"]
            try:
                batch_normalize = int(x["batch_normalize"])   #bn层是否存在,bn层与bias不能共存
                bias = False
            except:
                batch_normalize = 0
                bias = True
                
            filters= int(x["filters"])              #卷积核的数量
            padding = int(x["pad"])                 #是否使用padding
            kernel_size = int(x["size"])            #卷积核大小
            stride = int(x["stride"])                               #stride大小
            
            if padding:              #如果做padding,计算padding的数量
                pad = (kernel_size - 1) // 2
            else:
                pad = 0
            #Add the convolutional layer
            conv = nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias = bias)  #卷积层
            module.add_module("conv_{0}".format(index), conv)                               #讲层加入到module中,
            #Add the Batch Norm Layer
            if batch_normalize:                                                             #如果存在归一化,则添加bn层。bn层的参数为输入数据的通道数,也是上一个卷积层中的卷积核的数量
                bn = nn.BatchNorm2d(filters)
                module.add_module("batch_norm_{0}".format(index), bn)
            #Check the activation. 
            #It is either Linear or a Leaky ReLU for YOLO
            if activation == "leaky":                                                       #yolo中使用leakyReLU
                activn = nn.LeakyReLU(0.1, inplace = True)
                module.add_module("leaky_{0}".format(index), activn)
        elif (x["type"] == "upsample"):            #升采样层
            stride = int(x["stride"])
#            upsample = Upsample(stride)
            upsample = nn.Upsample(scale_factor = 2, mode = "nearest")
            module.add_module("upsample_{}".format(index), upsample)
        #If it is a route layer
        elif (x["type"] == "route"):      #路由层,将两个层延通道数串联起来
            x["layers"] = x["layers"].split(',')
            #Start  of a route
            start = int(x["layers"][0])
            #end, if there exists one.
            try:
                end = int(x["layers"][1])
            except:
                end = 0
            #Positive anotation
            if start > 0: 
                start = start - index
            if end > 0:
                end = end - index
            route = EmptyLayer()
            module.add_module("route_{0}".format(index), route)
            if end < 0:
                filters = output_filters[index + start] + output_filters[index + end]
            else:
                filters= output_filters[index + start]
        #shortcut corresponds to skip connection
        elif x["type"] == "shortcut":                               #shortcut是将两层叠加到一起
            from_ = int(x["from"])
            shortcut = EmptyLayer()
            module.add_module("shortcut_{}".format(index), shortcut)
        elif x["type"] == "maxpool":            #池化层
            stride = int(x["stride"])
            size = int(x["size"])
            if stride != 1:
                maxpool = nn.MaxPool2d(size, stride)
            else:
                maxpool = MaxPoolStride1(size)
            module.add_module("maxpool_{}".format(index), maxpool)
        #Yolo is the detection layer
        elif x["type"] == "yolo":               #检测层
            mask = x["mask"].split(",")
            mask = [int(x) for x in mask]
            anchors = x["anchors"].split(",")
            anchors = [int(a) for a in anchors]
            anchors = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors),2)]
            anchors = [anchors[i] for i in mask]
            
            detection = DetectionLayer(anchors)
            module.add_module("Detection_{}".format(index), detection)
        else:
            print("Something I dunno")
            assert False
        module_list.append(module)
        prev_filters = filters
        output_filters.append(filters)
        index += 1
    return (net_info, module_list)

Class Darknet

在类中,主要实现了forward()、load_weights()、save_weights()三个函数

forward()

在上文的create_modules函数中,

    def forward(self, x, CUDA):
        detections = []
        modules = self.blocks[1:]
        outputs = {}   #We cache the outputs for the route layer
        write = 0
        for i in range(len(modules)):
            module_type = (modules[i]["type"])
            if module_type == "convolutional" or module_type == "upsample" or module_type == "maxpool":
                x = self.module_list[i](x)
                outputs[i] = x
            elif module_type == "route":                #路由层的具体操作:将指定的两层或者 一层延通道的方向串联起来
                layers = modules[i]["layers"]
                layers = [int(a) for a in layers]
                if (layers[0]) > 0:
                    layers[0] = layers[0] - i
                if len(layers) == 1:
                    x = outputs[i + (layers[0])]
                else:
                    if (layers[1]) > 0:
                        layers[1] = layers[1] - i
                    map1 = outputs[i + layers[0]]
                    map2 = outputs[i + layers[1]]
                    x = torch.cat((map1, map2), 1)
                outputs[i] = x
            elif  module_type == "shortcut":            #shortcut层,将指定层与前一层对应相加
                from_ = int(modules[i]["from"])
                x = outputs[i-1] + outputs[i+from_]
                outputs[i] = x
            elif module_type == 'yolo':                 #检测层,具体实现由predict_transform实现,并将多个层进行拼接
                anchors = self.module_list[i][0].anchors
                #Get the input dimensions
                inp_dim = int (self.net_info["height"])
                #Get the number of classes
                num_classes = int (modules[i]["classes"])
                #Output the result
                x = x.data
                x = predict_transform(x, inp_dim, anchors, num_classes, CUDA)
                if type(x) == int:
                    continue
                if not write:                                   #如果没有检测结果的输出,对ditection进行初始化
                    detections = x
                    write = 1
                else:
                    detections = torch.cat((detections, x), 1)
                outputs[i] = outputs[i-1]
        try:
            return detections
        except:
            return 0

load_weights(),save_weights()暂不做详细展开

已标记关键词 清除标记
©️2020 CSDN 皮肤主题: 大白 设计师:CSDN官方博客 返回首页