MTCNN中生成positive,negative,part样本python代码解读

最新推荐文章于 2024-08-29 02:10:18 发布
intjun
最新推荐文章于 2024-08-29 02:10:18 发布
阅读量2k
点赞数
分类专栏： MTCNN
MTCNN 专栏收录该内容
2 篇文章 0 订阅
订阅专栏
最近跑通了MTCNN的训练代码，对其中生成positive,negative,part样本gen_48net_data2.py代码进行解读。
项目地址：https://github.com/dlunion/mtcnn
对应代码地址：https://github.com/dlunion/mtcnn/blob/master/train/gen_48net_data2.py

   
   
     
     
      
      
     
     
     
     
      
      
       
       import sys
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       sys.path.append(
       
       ‘D:\\Anaconda2\\libs’)    
       
       # 在windows系统上，导入python库目录
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       import numpy 
       
       as np
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       import cv2
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       import os
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       import numpy.random 
       
       as npr
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       from utils 
       
       import IoU
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       # stdsize：随机crop的窗口大小，positive,negative,part样本都对应此大小
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       stdsize = 
       
       48    
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       anno_file = 
       
       “E:/face_alignment/data/CelebA/Anno/mtcnn_train_label_2.txt”
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       im_dir = 
       
       “E:/face_alignment/data/CelebA/Img/img_celeba.7z/img_celeba/”
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       pos_save_dir = str(stdsize) + 
       
       “/positive”
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       part_save_dir = str(stdsize) + 
       
       “/part”
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       neg_save_dir = str(stdsize) + 
       
       ‘/negative’
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       save_dir = 
       
       “./” + str(stdsize)
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       # 生成文件夹函数
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       def mkr(dr):
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       if 
       
       not os.path.exists(dr):
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
               os.mkdir(dr)
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       mkr(save_dir)
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       mkr(pos_save_dir)
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       mkr(part_save_dir)
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       mkr(neg_save_dir)
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       # 打开保存pos,neg,part文件名、标签的txt文件，这三个是生成文件
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       f1 = open(os.path.join(save_dir, 
       
       ‘pos_’ + str(stdsize) + 
       
       ‘.txt’), 
       
       ‘w’)
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       f2 = open(os.path.join(save_dir, 
       
       ‘neg_’ + str(stdsize) + 
       
       ‘.txt’), 
       
       ‘w’)
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       f3 = open(os.path.join(save_dir, 
       
       ‘part_’ + str(stdsize) + 
       
       ‘.txt’), 
       
       ‘w’)
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       # 打开原始图片标注txt文件
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       with open(anno_file, 
       
       ‘r’) 
       
       as f:
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
           annotations = f.readlines()
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       num = len(annotations)
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       print 
       
       “%d pics in total” % num
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       p_idx = 
       
       0 
       
       # positive
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       n_idx = 
       
       0 
       
       # negative
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       d_idx = 
       
       0 
       
       # part
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       idx = 
       
       0
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       box_idx = 
       
       0
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       # 原始图片根据标注的bbox，生成negative,posotive,part图片，标注形式也做相应变化
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       for annotation 
       
       in annotations:    
       
       #逐行读取，按作者的方式，每行为一个原图
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
           annotation = annotation.strip().split(
       
       ’ ‘)    
       
       #对读取的每一行，按空格进行切片
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
           im_path = annotation[
       
       0]    
       
       # 第1个为图片名
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
           bbox = map(float, annotation[
       
       1:
       
       -10]) 
       
       #第2个~~倒数第11个为bbox
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       # pts = map(float, annotation[5:])
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
           pts = map(float, annotation[
       
       -10:])  
       
       #倒数第10个~~倒数第1个为landmark
      
      
     
     

     
     
      
      
     
     
     
     
      
        
       
       # 对bbox进行reshape，4个一列
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
           boxes = np.array(bbox, dtype=np.float32).reshape(
       
       -1, 
       
       4) 
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
           im_path = im_dir + im_path  
       
       #图片地址拼接
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
           img = cv2.imread(im_path)  
       
       #读取图片
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
           idx += 
       
       1
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       if idx % 
       
       100 == 
       
       0:
      
      
     
     

     
     
      
      
     
     
     
     
      
              
       
       print idx, 
       
       “images done”
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
           height, width, channel = img.shape
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
           neg_num = 
       
       0
      
      
     
     

     
     
      
      
     
     
     
     
      
        
       
       # 生成nagative，每个原图生成100个negative sample
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       while neg_num < 
       
       100:
      
      
     
     

     
     
      
      
     
     
     
     
      
            
       
       # size表示neg样本大小，在40和min(width, height)/2之间随机取一个整数
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
               size = npr.randint(
       
       40, min(width, height) / 
       
       2)
      
      
     
     

     
     
      
      
     
     
     
     
      
           
       
       # neg的左上角坐标(x1,y1)，在0和(width - size)之间随机取一个整数
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
               nx = npr.randint(
       
       0, width - size)
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
               ny = npr.randint(
       
       0, height - size)
      
      
     
     

     
     
      
      
     
     
     
     
      
              
       
       # 随机生成的bbox位置(x1,y1),(x2,y2)
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
               crop_box = np.array([nx, ny, nx + size, ny + size])
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
            
       
       # 计算随机生成的bbox和原图中所有标注bboxs的交并比
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
               Iou = IoU(crop_box, boxes)
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
           
       
       # 在原图中crop对应的区域图片，作为negative sample
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
               cropped_im = img[ny : ny + size, nx : nx + size, :]
      
      
     
     

     
     
      
      
     
     
     
     
      
              
       
       # 对crop的图像进行resize，大小为stdsize*stdsize
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
               resized_im = cv2.resize(cropped_im, (stdsize, stdsize), interpolation=cv2.INTER_LINEAR)
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
           
       
       # 如果crop_box与所有boxes的Iou都小于0.3，那么认为它是nagative sample
      
      
     
     

     
     
      
      
     
     
     
     
      
              
       
       if np.max(Iou) < 
       
       0.3:
      
      
     
     

     
     
      
      
     
     
     
     
      
                  
       
       # Iou with all gts must below 0.3
      
      
     
     

     
     
      
      
     
     
     
     
      
                  
       
       # 保存图片的地址和图片名
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                   save_file = os.path.join(neg_save_dir, 
       
       “%s.jpg”%n_idx)
      
      
     
     

     
     
      
      
     
     
     
     
      
                
       
       # 往neg_48.txt文件中写入该negative样本的图片地址和名字，分类标签
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                   f2.write(str(stdsize)+
       
       “/negative/%s”%n_idx + 
       
       ’ 0\n’)
      
      
     
     

     
     
      
      
     
     
     
     
      
                
       
       # 保存该负样本图片
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                   cv2.imwrite(save_file, resized_im)
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                   n_idx += 
       
       1
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                   neg_num += 
       
       1
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
           backupPts = pts[:]  
       
       # 该列表用于landmark      
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       for box 
       
       in boxes:  
       
       #逐行读取，每次循环处理一个box
      
      
     
     

     
     
      
      
     
     
     
     
      
              
       
       # box (x_left, y_top, x_right, y_bottom)
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
               x1, y1, x2, y2 = box
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
               w = x2 - x1 + 
       
       1
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
               h = y2 - y1 + 
       
       1
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
              
       
       # 忽略小脸
      
      
     
     

     
     
      
      
     
     
     
     
      
              
       
       # in case the ground truth boxes of small faces are not accurate
      
      
     
     

     
     
      
      
     
     
     
     
      
              
       
       if max(w, h) < 
       
       12 
       
       or x1 < 
       
       0 
       
       or y1 < 
       
       0:
      
      
     
     

     
     
      
      
     
     
     
     
      
                  
       
       continue
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
              
       
       # 生成 positive examples and part faces
      
      
     
     

     
     
      
      
     
     
     
     
      
              
       
       # 每个box随机生成50个box，Iou>=0.65的作为positive examples，0.4<=Iou<0.65的作为part faces，其他忽略
      
      
     
     

     
     
      
      
     
     
     
     
      
              
       
       for i 
       
       in range(
       
       50):
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                   pts = backupPts[:]
      
      
     
     

     
     
      
      
     
     
     
     
      
               
       
       # size表示随机生成样本的大小，在int(min(w, h) * 0.8) 和 np.ceil(1.25 * max(w, h)) 之间
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                   size = npr.randint(int(min(w, h) * 
       
       0.8), np.ceil(
       
       1.25 * max(w, h)))
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
                  
       
       # delta 表示相对于标注box center的偏移量
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                   delta_x = npr.randint(-w * 
       
       0.2, w * 
       
       0.2)
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                   delta_y = npr.randint(-h * 
       
       0.2, h * 
       
       0.2)
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
                 
       
       # nx,ny表示偏移后的box坐标位置
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                   nx1 = max(x1 + w / 
       
       2 + delta_x - size / 
       
       2, 
       
       0)
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                   ny1 = max(y1 + h / 
       
       2 + delta_y - size / 
       
       2, 
       
       0)
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                   nx2 = nx1 + size
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                   ny2 = ny1 + size
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
                 
       
       # 去掉超出原图的box
      
      
     
     

     
     
      
      
     
     
     
     
      
                  
       
       if nx2 > width 
       
       or ny2 > height:
      
      
     
     

     
     
      
      
     
     
     
     
      
                      
       
       continue
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                   crop_box = np.array([nx1, ny1, nx2, ny2])
      
      
     
     

     
     
      
      
     
     
     
     
      
                
      
      
     
     

     
     
      
      
     
     
     
     
      
                  
       
       #bbox偏移量的计算，由 x1 = nx1 + float(size)*offset_x1 推导而来，可以参考bounding box regression博客 
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                   offset_x1 = (x1 - nx1) / float(size)
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                   offset_y1 = (y1 - ny1) / float(size)
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                   offset_x2 = (x2 - nx1) / float(size)
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                   offset_y2 = (y2 - ny1) / float(size)
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
                  
       
       # landmark偏移量的计算，即landmark相对于随机生成bbox的归一化相对位置。
      
      
     
     

     
     
      
      
     
     
     
     
      
                  
       
       for k 
       
       in range(len(pts) / 
       
       2):
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                       pts[k*
       
       2] = (pts[k*
       
       2] - nx1) / float(size);
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                       pts[k*
       
       2+
       
       1] = (pts[k*
       
       2+
       
       1] - ny1) / float(size);
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                   cropped_im = img[int(ny1) : int(ny2), int(nx1) : int(nx2), :]
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                   resized_im = cv2.resize(cropped_im, (stdsize, stdsize), interpolation=cv2.INTER_LINEAR)
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
                
       
       # 将box reshape为一行
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                   box_ = box.reshape(
       
       1, 
       
       -1)
      
      
     
     

     
     
      
      
     
     
     
     
      
                  
       
       # Iou>=0.65的作为positive examples
      
      
     
     

     
     
      
      
     
     
     
     
      
                  
       
       if IoU(crop_box, box_) >= 
       
       0.65:
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                       save_file = os.path.join(pos_save_dir, 
       
       “%s.jpg”%p_idx)
      
      
     
     

     
     
      
      
     
     
     
     
      
                   
       
       # 将图片路径，类别，偏移量写入到positive_48.txt文件中
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                       f1.write(str(stdsize)+
       
       “/positive/%s”%p_idx + 
       
       ’ 1 %f %f %f %f’%(offset_x1, offset_y1, offset_x2, offset_y2))
      
      
     
     

     
     
      
      
     
     
     
     
      
                      
      
      
     
     

     
     
      
      
     
     
     
     
      
                     
       
       # 将landmark写入到positive_48.txt文件中
      
      
     
     

     
     
      
      
     
     
     
     
      
                      
       
       for k 
       
       in range(len(pts)):
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                           f1.write(
       
       ” %f” % pts[k])
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                       f1.write(
       
       “\n”)
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                       cv2.imwrite(save_file, resized_im)
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                       p_idx += 
       
       1
      
      
     
     

     
     
      
      
     
     
     
     
      
                    
      
      
     
     

     
     
      
      
     
     
     
     
      
                 
       
       # 0.4<=Iou<0.65的作为part faces
      
      
     
     

     
     
      
      
     
     
     
     
      
                  
       
       elif IoU(crop_box, box_) >= 
       
       0.4:
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                       save_file = os.path.join(part_save_dir, 
       
       “%s.jpg”%d_idx)
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                       f3.write(str(stdsize)+
       
       “/part/%s”%d_idx + 
       
       ’ -1 %f %f %f %f’%(offset_x1, offset_y1, offset_x2, offset_y2))
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
                      
       
       for k 
       
       in range(len(pts)):
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                           f3.write(
       
       ” %f” % pts[k])
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                       f3.write(
       
       “\n”)
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                       cv2.imwrite(save_file, resized_im)
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                       d_idx += 
       
       1
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
               box_idx += 
       
       1
      
      
     
     

     
     
      
      
     
     
     
     
      
              
       
       print 
       
       “%s images done, pos: %s part: %s neg: %s”%(idx, p_idx, d_idx, n_idx)
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       f1.close()
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       f2.close()
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       f3.close()