mtcnn代码解析

去你个锤子

于 2021-01-26 16:44:57 发布

阅读量894

点赞数

分类专栏： Linux 文章标签： pytorch

本文链接：https://blog.csdn.net/weixin_44152895/article/details/113183765

版权

Linux 专栏收录该内容

31 篇文章 1 订阅

订阅专栏

transform.py

import os
import sys
sys.path.append(os.getcwd())
from wider_loader import WIDER
import cv2
import time

"""
 modify .mat to .txt 
"""

#wider face original images path
path_to_image = './data_set/face_detection/WIDER_train/images'

#matlab file path
file_to_label = './data_set/face_detection/wider_face_split/wider_face_split/wider_face_train.mat'

#target file path
target_file = './anno_store/anno_train.txt'

wider = WIDER(file_to_label, path_to_image)

#next（）函数，next(iterable, default)
#next()函数返回迭代器中的下一个元素。
#可以添加默认的返回值，以在迭代结束时返回。
#next() 函数要和生成迭代器的 iter() 函数一起使用。
#next(iterable, default)
#iterable必需的参数，一个可迭代的对象。
#default可选的。如果迭代器已结束，则返回默认值。

line_count = 0
box_count = 0

print('start transforming....')
t = time.time()

with open(target_file, 'w+') as f:

#读取文件，等同下面代码。
#try:
#    f = open('/path/', 'r')
#    print(f.read())
#finally:
#    if f:
#        f.close()

    # press ctrl-C to stop the process
    for data in wider.next():
        line = [] #列表[],元组（），字典{key:value}，集合{}。
        line.append(str(data.image_name))
        line_count += 1
        for i,box in enumerate(data.bboxes):
                  #enumerate 同时获得索引和值
            box_count += 1
            for j,bvalue in enumerate(box):
                line.append(str(bvalue))

        line.append('\n')

        line_str = ' '.join(line)
                  #将字符串串联，''每个字符串以冒号内符号隔开。
        f.write(line_str)

st = time.time()-t
print('end transforming')

print('spend time:%d'%st)
print('total line(images):%d'%line_count)
print('total boxes(faces):%d'%box_count)

gen_Pnet_train_data.py

"""
    2018-10-20 15:50:20
    generate positive, negative, positive images whose size are 12*12 and feed into PNet
"""
import sys
import numpy as np
import cv2
import os
sys.path.append(os.getcwd())
import numpy as np
from mtcnn.data_preprocess.utils import IoU

prefix        = ''
anno_file     = "./anno_store/anno_train.txt"#存放wider_face数据集中的图片名，和图像中的人脸框参数
im_dir        = "./data_set/face_detection/WIDER_train/images" #   ./ 当前目录。../ 父级目录。 / 根目录
pos_save_dir  = "./data_set/train/12/positive"#存放人脸
part_save_dir = "./data_set/train/12/part" #存放部分人脸
neg_save_dir  = './data_set/train/12/negative' #非人脸

if not os.path.exists(pos_save_dir):
    os.mkdir(pos_save_dir)
if not os.path.exists(part_save_dir):
    os.mkdir(part_save_dir)
if not os.path.exists(neg_save_dir):
    os.mkdir(neg_save_dir)

# store labels of positive, negative, part images
f1 = open(os.path.join('./anno_store', 'pos_12.txt'),  'w')#将多个路径组合后返回，并打开pos_12.txt文件，进行写入操作
f2 = open(os.path.join('./anno_store', 'neg_12.txt'),  'w')
f3 = open(os.path.join('./anno_store', 'part_12.txt'), 'w')

# anno_file: store labels of the wider face training data
with open(anno_file, 'r') as f:
    annotations = f.readlines()
#依次读取每行，readlines() 方法用于读取所有行(直到结束符 EOF)并返回列表，
#如果碰到结束符 EOF 则返回空字符串。
num = len(annotations)#行数，也是图像总数
print("%d pics in total" % num)

p_idx = 0 # positive
n_idx = 0 # negative
d_idx = 0 # dont care
idx = 0
box_idx = 0
for annotation in annotations:#遍历每一行（每一个图像）
    annotation = annotation.strip().split(' ')#1.去掉每行的头尾空白，2.按‘ ’进行分割
    im_path = os.path.join(prefix, annotation[0])
#join() 方法用于将序列中的元素以指定的字符连接生成一个新的字符串。
    print(im_path)
    bbox = list(map(float, annotation[1:]))#图像中人脸框的4个参数
#map()是 Python 内置的高阶函数，它接收一个函数 f 和一个 list，并通过把函数 f 依次作用在 list 的每个元素上，得到一个新的 list 并返回。
    boxes = np.array(bbox, dtype=np.int32).reshape(-1, 4)
    img = cv2.imread(im_path)#根据路径和图像名读取图像
    idx += 1
    if idx % 100 == 0:
        print(idx, "images done")

    height, width, channel = img.shape

    neg_num = 0
    #1---->50
    # keep crop random parts, until have 50 negative examples
    # 每张图像生成50个负样本


    while neg_num < 50:
        # neg_num's size [40,min(width, height) / 2],min_size:40
        # size is a random number between 12 and min(width,height)

        size = np.random.randint(12, min(width, height) / 2)#随机生成截取区域的size，因为网络输入时12*12，所以截取尺寸大于12.
        nx = np.random.randint(0, width - size)#随机生成截取区域的左上点坐标
        ny = np.random.randint(0, height - size)
        crop_box = np.array([nx, ny, nx + size, ny + size])#截取区域的矩形框的参数

        Iou = IoU(crop_box, boxes) #计算IOU

#nx,ny：随机生成截取矩形区域的左上角坐标。
#crop_box=[x,y,w,h]表示一个矩形框。
#resized_im：从原图中提取矩形框区域，并resize成12*12

        cropped_im = img[ny: ny + size, nx: nx + size, :]#截取区域
        resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)#区域缩放成12*12，用于PNet训练输入

        if np.max(Iou) < 0.3:#IOU<0.3的截取区域设为负样本
            # Iou with all gts must below 0.3
            save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
            f2.write(save_file + ' 0\n')#将负样本的信息写入TXT文件中
            cv2.imwrite(save_file, resized_im)#保存负样本
            n_idx += 1
            neg_num += 1

    for box in boxes:
        # box (x_left, y_top, x_right, y_bottom)
        #x1, y1, x2, y2 = box
        x1, y1, w, h = box
        # w = x2 - x1 + 1
        # h = y2 - y1 + 1

        x2 = x1 + w - 1
        y2 = y1 + h - 1

        # ignore small faces
        # in case the ground truth boxes of small faces are not accurate
        # 对于人脸框小于20，以及人脸框坐标在图像外的样本，进行忽略
if max(w, h) < 40 or x1 < 0 or y1 < 0:
            continue
# 在人脸框附近，crop5个矩形区域，使IOU《0.5,将其作为负样本，这样的样本识别难度很大，应用于训练模型，有利于提高模型的准确度

        # generate negative examples that have overlap with gt
        for i in range(5):
            size = np.random.randint(12, min(width, height) / 2)
            # delta_x and delta_y are offsets of (x1, y1)
            # delta_x and delta_y 是相对于(x1, y1)的偏移量

            delta_x = np.random.randint(max(-size, -x1), w)#max函数的目的是确保，如果delta为负，保证 x1+delta_x >0
            delta_y = np.random.randint(max(-size, -y1), h)
            nx1 = max(0, x1 + delta_x)# 这里的max不再需要
            ny1 = max(0, y1 + delta_y)

            if nx1 + size > width or ny1 + size > height:#如果矩形区域右下角超出图像边界，则跳过此次训练，即舍弃该矩形区域
                continue
            crop_box = np.array([nx1, ny1, nx1 + size, ny1 + size])
            Iou = IoU(crop_box, boxes)

            cropped_im = img[ny1: ny1 + size, nx1: nx1 + size, :] #截取矩形区域，并转化为12*12尺寸，用于PNet网络训练输入
            resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)
            

if np.max(Iou) < 0.3:#IOU<0.3，满足负样本条件，保存图像，并将图像路径+label写入TXT文件
                # Iou with all gts must below 0.3
                save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
                f2.write(save_file + ' 0\n')
                cv2.imwrite(save_file, resized_im)
                n_idx += 1

        # generate positive examples and part faces
        for i in range(20):#根据数据集的人脸图像，生成人脸样本和部分人脸样本，共20个
            # pos and part face size [minsize*0.8,maxsize*1.25]
            size = np.random.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h)))#随机初始化截取的样本的size
            if w<5: #太小的不考虑
                print (w)
                continue
            #print (box)

            # delta here is the offset of box center
            delta_x = np.random.randint(-w * 0.2, w * 0.2) #相对于人脸框中心点的偏移量
            delta_y = np.random.randint(-h * 0.2, h * 0.2)

# 生成一个新的人脸框，用于做边框回归预测
            nx1 = max(x1 + w / 2 + delta_x - size / 2, 0)# nx1 = max(x1+w/2-size/2+delta_x)，（x1+w/2，y1 + h / 2）表示数据集图像中人脸框的中心
            ny1 = max(y1 + h / 2 + delta_y - size / 2, 0)
            nx2 = nx1 + size
            ny2 = ny1 + size

            if nx2 > width or ny2 > height:#越界就删除
                continue
            crop_box = np.array([nx1, ny1, nx2, ny2])
#计算两个人脸框的偏移量，将来用于边框回归
            offset_x1 = (x1 - nx1) / float(size)
            offset_y1 = (y1 - ny1) / float(size)
            offset_x2 = (x2 - nx2) / float(size)
            offset_y2 = (y2 - ny2) / float(size)

            cropped_im = img[int(ny1): int(ny2), int(nx1): int(nx2), :]
            resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)

            box_ = box.reshape(1, -1)
            if IoU(crop_box, box_) >= 0.65: # iou  >= 0.65，设为正样本，label设为 1 
                save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)
                f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2))
                cv2.imwrite(save_file, resized_im)
                p_idx += 1
            elif IoU(crop_box, box_) >= 0.4:#设为部分人脸样本，label设为 -1
                save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)
                f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2))
                cv2.imwrite(save_file, resized_im)
                d_idx += 1
        box_idx += 1
        print("%s images done, pos: %s part: %s neg: %s" % (idx, p_idx, d_idx, n_idx))

f1.close()
f2.close()
f3.close()

train_P_net.py

import argparse
import sys
import os
sys.path.append(os.getcwd())
from mtcnn.core.imagedb import ImageDB
from mtcnn.train_net.train import train_pnet
import mtcnn.config as config

annotation_file = './anno_store/imglist_anno_12.txt'
model_store_path = './model_store'
end_epoch = 10
frequent = 200
lr = 0.01
batch_size = 512
use_cuda = False


def train_net(annotation_file, model_store_path,
                end_epoch=16, frequent=200, lr=0.01, batch_size=128, use_cuda=False):

    imagedb = ImageDB(annotation_file)
    gt_imdb = imagedb.load_imdb()
    gt_imdb = imagedb.append_flipped_images(gt_imdb)
    train_pnet(model_store_path=model_store_path, end_epoch=end_epoch, imdb=gt_imdb, batch_size=batch_size, frequent=frequent, base_lr=lr, use_cuda=use_cuda)

def parse_args():
    parser = argparse.ArgumentParser(description='Train PNet',
                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)


    parser.add_argument('--anno_file', dest='annotation_file',
                        default=os.path.join(config.ANNO_STORE_DIR,config.PNET_TRAIN_IMGLIST_FILENAME), help='training data annotation file', type=str)
    parser.add_argument('--model_path', dest='model_store_path', help='training model store directory',
                        default=config.MODEL_STORE_DIR, type=str)
    parser.add_argument('--end_epoch', dest='end_epoch', help='end epoch of training',
                        default=config.END_EPOCH, type=int)
    parser.add_argument('--frequent', dest='frequent', help='frequency of logging',
                        default=200, type=int)
    parser.add_argument('--lr', dest='lr', help='learning rate',
                        default=config.TRAIN_LR, type=float)
    parser.add_argument('--batch_size', dest='batch_size', help='train batch size',
                        default=config.TRAIN_BATCH_SIZE, type=int)
    parser.add_argument('--gpu', dest='use_cuda', help='train with gpu',
                        default=config.USE_CUDA, type=bool)
    parser.add_argument('--prefix_path', dest='', help='training data annotation images prefix root path', type=str)

    args = parser.parse_args()
    return args

if __name__ == '__main__':
    # args = parse_args()
    print('train Pnet argument:')
    # print(args)



    train_net(annotation_file, model_store_path,
                end_epoch, frequent, lr, batch_size, use_cuda)

    # train_net(annotation_file=args.annotation_file, model_store_path=args.model_store_path,
    #             end_epoch=args.end_epoch, frequent=args.frequent, lr=args.lr, batch_size=args.batch_size, use_cuda=args.use_cuda)

mtcnn_test.py

import cv2
from mtcnn.core.detect import create_mtcnn_net, MtcnnDetector
from mtcnn.core.vision import vis_face




if __name__ == '__main__':

    #pnet, rnet, onet = create_mtcnn_net(p_model_path="./original_model/pnet_epoch.pt", r_model_path="./original_model/rnet_epoch.pt", o_model_path="./original_model/onet_epoch.pt", use_cuda=False)
    pnet, rnet, onet = create_mtcnn_net(p_model_path="./model_store/pnet_epoch.pt", use_cuda=False)
    mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet, min_face_size=24)

    img = cv2.imread("./112.jpg")
    img_bg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    #b, g, r = cv2.split(img)
    #img2 = cv2.merge([r, g, b])

    bboxs, landmarks = mtcnn_detector.detect_face(img)
    # print box_align
    save_name = 'r_4.jpg'
    vis_face(img_bg,bboxs,landmarks, save_name)

去你个锤子

关注

0
点赞
踩
8

收藏

觉得还不错? 一键收藏
1
评论
mtcnn代码解析

transform.pyimport osimport syssys.path.append(os.getcwd())from wider_loader import WIDERimport cv2import time""" modify .mat to .txt """#wider face original images pathpath_to_image = './data_set/face_detection/WIDER_train/images'#mat...
复制链接

扫一扫