json_path = "/workspace/xyf/hand_gesture_recongize/data/Val_data/coco_gt_part.json"
train_img_root_path = "/workspace/xyf/hand_gesture_recongize/data/Val_data/JEPGimgs_part"
val_img_root_path = "/workspace/xyf/hand_gesture_recongize/data/Val_data/JEPGimgs"
import os
import cv2
import re,json,copy
import random
import shutil,pdb
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import mediapipe as mp
import numpy as np
import colorsys
from argparse import ArgumentParser
import shutil
def dist(point1, point2):
return (point1[0]-point2[0])**2 + (point1[1]-point2[1])**2
def find_min_dist(center_xy, bbox_list):
min_dist = 10000
min_idx = -1
for idx, bbox in enumerate(bbox_list):
x1,y1,x2,y2 = bbox[0]
center_x = (x1+x2)/2
center_y = (y1+y2)/2
dist_val = dist(center_xy, [center_x, center_y])
if dist_val < min_dist:
min_dist = dist_val
min_idx = idx
return min_idx,min_dist
def save_small_pic(small_pic_path,big_pic_path,bbox):
img = cv2.imread(big_pic_path)
sm_pic = img[bbox[1]:bbox[3], bbox[0]:bbox[2], :]
if sm_pic.shape[0]==0 or sm_pic.shape[1]==0:
exit(1)
print(f"sm_pic.shape={sm_pic.shape}")
cv2.imwrite(small_pic_path, sm_pic)
def sv_txt(img_path_root,json_path,small_path):
cocoGt = COCO(json_path)
save_txt = json_path.replace('json','txt')
img_ids = cocoGt.getImgIds()
imgs = cocoGt.loadImgs(img_ids)
# pdb.set_trace()
img_id_file_dict = {}
img_id_dict = {}
for img in imgs:
img_id = img['id']
file_name = img['file_name']#.split('/')[-1].split('.')[0]
img_id_file_dict[img_id] = file_name
img_id_dict[file_name] = img_id
img_id_box_class_dict = {}
with open(save_txt,'w') as f:
for idx ,ann in enumerate(cocoGt.dataset['annotations']):
img_id = ann['image_id']
bbox = [int(ann['bbox'][0]), int(ann['bbox'][1]), int(ann['bbox'][0]+ann['bbox'][2]), int(ann['bbox'][1]+ann['bbox'][3])] #xyxy
category_id = ann['category_id']
small_pic_path = os.path.join(small_path, img_id_file_dict[img_id].replace('.jpg','_')+str(idx)+'.jpg')
big_pic_path = os.path.join(img_path_root, img_id_file_dict[img_id])
f.write(small_pic_path+' '+str(category_id)+'\n')
save_small_pic(small_pic_path,big_pic_path,bbox)
if img_id not in img_id_box_class_dict:
img_id_box_class_dict[img_id] = []
img_id_box_class_dict[img_id].append([bbox, category_id])
return img_id_dict, img_id_box_class_dict
def get_ratio(bbox_landmark,bbox_xml): # xywh-(landmark-groundtruth)
pdb.set_trace()
landmark_xc = bbox_landmark[0]+bbox_landmark[2]*0.5
landmark_yc = bbox_landmark[1]+bbox_landmark[3]*0.5
landmark_w = bbox_landmark[2]
landmark_h = bbox_landmark[3]
bbox_xml_xmin,bbox_xml_ymin,bbox_xml_xmax,bbox_xml_ymax = bbox_xml[0],bbox_xml[1],bbox_xml[0]+bbox_xml[2],bbox_xml[1]+bbox_xml[3]
right_ratio = (bbox_xml_xmax-landmark_xc)/(landmark_w/2)-1
left_ratio = (landmark_xc-bbox_xml_xmin)/(landmark_w/2)-1
top_ratio = (landmark_yc-bbox_xml_ymin)/(landmark_h/2)-1
bottom_ratio = (bbox_xml_ymax-landmark_yc)/(landmark_h/2)-1
return top_ratio,bottom_ratio,left_ratio,right_ratio
def get_box_ratio_json(json_path, img_root_path):
# pdb.set_trace()
Top,Bottom,Left,Right = {},{},{},{}
count = {}
res = {}
mp_hands = mp.solutions.hands
with mp_hands.Hands(static_image_mode=True,max_num_hands=5,min_detection_confidence=0.5) as hands:
img_id_dict, img_id_box_class_dict = sv_txt(img_root_path,json_path,'./small')
img_name_list = os.listdir(img_root_path)
ret_landmark_list = []
lable_list = []
for img_name in img_name_list:
img_path = os.path.join(img_root_path, img_name)
if img_name not in img_id_dict:
print(f"img_id_dict error, img_name={img_id_dict}")
exit(1)
# pdb.set_trace()
img_id = img_id_dict[img_name]
bbox_list = img_id_box_class_dict[img_id]
image = cv2.imread(img_path)
results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
image_width, image_height = image.shape[1], image.shape[0]
if results.multi_hand_landmarks :
if len(results.multi_hand_landmarks) == len(bbox_list):
for hand_landmarks in results.multi_hand_landmarks:
landmark_x_list = []
landmark_y_list = []
landmark_list = []
for _, landmark in enumerate(hand_landmarks.landmark):
landmark_x = min(int(landmark.x * image_width), image_width - 1)
landmark_y = min(int(landmark.y * image_height), image_height - 1)
landmark_x_list.append(landmark_x)
landmark_y_list.append(landmark_y)
landmark_list.append([landmark_x, landmark_y])
# pdb.set_trace()
minx = min(landmark_x_list)
maxx = max(landmark_x_list)
miny = min(landmark_y_list)
maxy = max(landmark_y_list)
cur_keypoint_box_xywh = [minx, miny, maxx-minx, maxy-miny]
mean_x = sum(landmark_x_list)/len(landmark_x_list)
mean_y = sum(landmark_y_list)/len(landmark_y_list)
min_idx, _ = find_min_dist([mean_x, mean_y], bbox_list)
cur_class_id = bbox_list[min_idx][1] #cur_palm pred id
cur_bbox = bbox_list[min_idx][0] #cur_palm pred id
cur_bbox_xywh = [cur_bbox[0], cur_bbox[1], cur_bbox[2]-cur_bbox[0], cur_bbox[3]-cur_bbox[1]]
t,b,l,r = get_ratio(cur_keypoint_box_xywh, cur_bbox_xywh)
pdb.set_trace()
Top[cur_class_id] = Top.get(cur_class_id,0)+t
Bottom[cur_class_id] = Bottom.get(cur_class_id,0)+b
Left[cur_class_id] = Left.get(cur_class_id,0)+l
Right[cur_class_id] = Right.get(cur_class_id,0)+r
count[cur_class_id] = count.get(cur_class_id,0)+1
else:
print(f"gt box num and keypoint box num is not eque:{len(results.multi_hand_landmarks)}-{len(bbox_list)}")
Top[cur_class_id] = Top.get(cur_class_id,0)+t
Bottom[cur_class_id] = Bottom.get(cur_class_id,0)+b
Left[cur_class_id] = Left.get(cur_class_id,0)+l
Right[cur_class_id] = Right.get(cur_class_id,0)+r
count[cur_class_id] = count.get(cur_class_id,0)+1
for key in Top:
res[key] = [Top[key]/count[key],Bottom[key]/count[key],Left[key]/count[key],Right[key]/count[key]]
print(res)
get_box_ratio_json(json_path, train_img_root_path)
util.
最新推荐文章于 2024-10-04 06:04:26 发布