util.

最新推荐文章于 2024-10-04 06:04:26 发布
miaomiao妙妙
最新推荐文章于 2024-10-04 06:04:26 发布
阅读量104
点赞数
文章标签：深度学习计算机视觉机器学习
本文链接：https://blog.csdn.net/weixin_42130810/article/details/121370454
版权
json_path = "/workspace/xyf/hand_gesture_recongize/data/Val_data/coco_gt_part.json"
train_img_root_path = "/workspace/xyf/hand_gesture_recongize/data/Val_data/JEPGimgs_part"
val_img_root_path = "/workspace/xyf/hand_gesture_recongize/data/Val_data/JEPGimgs"

import os
import cv2
import re,json,copy
import random
import shutil,pdb
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import mediapipe as mp
import numpy as np
import colorsys
from argparse import ArgumentParser
import shutil

def dist(point1, point2):
    return (point1[0]-point2[0])**2 + (point1[1]-point2[1])**2

def find_min_dist(center_xy, bbox_list):
    min_dist = 10000
    min_idx = -1
    for idx, bbox in enumerate(bbox_list):
        x1,y1,x2,y2 = bbox[0]
        center_x = (x1+x2)/2
        center_y = (y1+y2)/2
        dist_val = dist(center_xy, [center_x, center_y])
        if dist_val < min_dist:
            min_dist = dist_val
            min_idx = idx
    return min_idx,min_dist

def save_small_pic(small_pic_path,big_pic_path,bbox):
    img = cv2.imread(big_pic_path)
    sm_pic = img[bbox[1]:bbox[3], bbox[0]:bbox[2], :]
    if sm_pic.shape[0]==0 or sm_pic.shape[1]==0:
        exit(1)
    print(f"sm_pic.shape={sm_pic.shape}")
    cv2.imwrite(small_pic_path, sm_pic)


def sv_txt(img_path_root,json_path,small_path):
    cocoGt = COCO(json_path)
    save_txt = json_path.replace('json','txt')

    img_ids = cocoGt.getImgIds()
    imgs = cocoGt.loadImgs(img_ids)
    # pdb.set_trace()
    img_id_file_dict = {}
    img_id_dict = {}
    for img in imgs:
        img_id = img['id']
        file_name = img['file_name']#.split('/')[-1].split('.')[0]
        img_id_file_dict[img_id] = file_name
        img_id_dict[file_name] = img_id

    img_id_box_class_dict = {}
    with open(save_txt,'w') as f:
        for idx ,ann in enumerate(cocoGt.dataset['annotations']):
            img_id = ann['image_id']
            bbox = [int(ann['bbox'][0]), int(ann['bbox'][1]), int(ann['bbox'][0]+ann['bbox'][2]), int(ann['bbox'][1]+ann['bbox'][3])] #xyxy
            category_id = ann['category_id']
            small_pic_path = os.path.join(small_path, img_id_file_dict[img_id].replace('.jpg','_')+str(idx)+'.jpg')
            big_pic_path = os.path.join(img_path_root, img_id_file_dict[img_id])
            f.write(small_pic_path+' '+str(category_id)+'\n')  
            
            save_small_pic(small_pic_path,big_pic_path,bbox)
            if img_id not in img_id_box_class_dict:
                img_id_box_class_dict[img_id] = []
            img_id_box_class_dict[img_id].append([bbox, category_id])
    
        
    return img_id_dict, img_id_box_class_dict


def get_ratio(bbox_landmark,bbox_xml): # xywh-(landmark-groundtruth)
    pdb.set_trace()
    landmark_xc = bbox_landmark[0]+bbox_landmark[2]*0.5
    landmark_yc = bbox_landmark[1]+bbox_landmark[3]*0.5
    landmark_w = bbox_landmark[2]
    landmark_h = bbox_landmark[3]
    bbox_xml_xmin,bbox_xml_ymin,bbox_xml_xmax,bbox_xml_ymax = bbox_xml[0],bbox_xml[1],bbox_xml[0]+bbox_xml[2],bbox_xml[1]+bbox_xml[3]
    right_ratio = (bbox_xml_xmax-landmark_xc)/(landmark_w/2)-1
    left_ratio = (landmark_xc-bbox_xml_xmin)/(landmark_w/2)-1
    top_ratio = (landmark_yc-bbox_xml_ymin)/(landmark_h/2)-1
    bottom_ratio = (bbox_xml_ymax-landmark_yc)/(landmark_h/2)-1
    return top_ratio,bottom_ratio,left_ratio,right_ratio


def get_box_ratio_json(json_path, img_root_path):
    # pdb.set_trace()
    Top,Bottom,Left,Right = {},{},{},{}
    count = {}
    res = {}

    mp_hands = mp.solutions.hands
    with mp_hands.Hands(static_image_mode=True,max_num_hands=5,min_detection_confidence=0.5) as hands:
        img_id_dict, img_id_box_class_dict = sv_txt(img_root_path,json_path,'./small')
        img_name_list = os.listdir(img_root_path)
        ret_landmark_list = []
        lable_list = []
        for img_name in img_name_list:
            img_path = os.path.join(img_root_path, img_name)
            if img_name not in img_id_dict:
                print(f"img_id_dict error, img_name={img_id_dict}")
                exit(1)
            # pdb.set_trace()
            img_id = img_id_dict[img_name]
            bbox_list = img_id_box_class_dict[img_id]

            image = cv2.imread(img_path)
            results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
            image_width, image_height = image.shape[1], image.shape[0]

            
            if results.multi_hand_landmarks :
                if len(results.multi_hand_landmarks) == len(bbox_list):
                
                    for hand_landmarks in results.multi_hand_landmarks:
                        landmark_x_list = []
                        landmark_y_list = []
                        landmark_list = []
                        for _, landmark in enumerate(hand_landmarks.landmark):
                            landmark_x = min(int(landmark.x * image_width), image_width - 1)
                            landmark_y = min(int(landmark.y * image_height), image_height - 1)
                            landmark_x_list.append(landmark_x)
                            landmark_y_list.append(landmark_y)
                            landmark_list.append([landmark_x, landmark_y])
                        # pdb.set_trace()
                        minx = min(landmark_x_list)
                        maxx = max(landmark_x_list)
                        miny = min(landmark_y_list)
                        maxy = max(landmark_y_list)
                        cur_keypoint_box_xywh = [minx, miny, maxx-minx, maxy-miny]
                        mean_x = sum(landmark_x_list)/len(landmark_x_list)
                        mean_y = sum(landmark_y_list)/len(landmark_y_list)
                        min_idx, _ = find_min_dist([mean_x, mean_y], bbox_list)
                        cur_class_id = bbox_list[min_idx][1] #cur_palm pred id
                        cur_bbox = bbox_list[min_idx][0] #cur_palm pred id
                        cur_bbox_xywh = [cur_bbox[0], cur_bbox[1], cur_bbox[2]-cur_bbox[0], cur_bbox[3]-cur_bbox[1]]
                        t,b,l,r = get_ratio(cur_keypoint_box_xywh, cur_bbox_xywh)
                        pdb.set_trace()
                        Top[cur_class_id] = Top.get(cur_class_id,0)+t
                        Bottom[cur_class_id] = Bottom.get(cur_class_id,0)+b
                        Left[cur_class_id] = Left.get(cur_class_id,0)+l
                        Right[cur_class_id] = Right.get(cur_class_id,0)+r
                        count[cur_class_id] = count.get(cur_class_id,0)+1
                else:
                    print(f"gt box num and keypoint box num is not eque:{len(results.multi_hand_landmarks)}-{len(bbox_list)}")

        Top[cur_class_id] = Top.get(cur_class_id,0)+t
        Bottom[cur_class_id] = Bottom.get(cur_class_id,0)+b
        Left[cur_class_id] = Left.get(cur_class_id,0)+l
        Right[cur_class_id] = Right.get(cur_class_id,0)+r
        count[cur_class_id] = count.get(cur_class_id,0)+1 
        for key in Top:
            res[key] = [Top[key]/count[key],Bottom[key]/count[key],Left[key]/count[key],Right[key]/count[key]]
        print(res)
  

get_box_ratio_json(json_path, train_img_root_path)