diff_ratio
# -*- coding: utf-8 -*-
import os
import cv2
import re,json,copy
import random
import shutil,pdb
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import mediapipe as mp
import numpy as np
import colorsys
from argparse import ArgumentParser
from mmcls.apis import inference_model, init_model, show_result_pyplot
import shutil
def draw_box1(img, box_xyxy,class_name,color=(0,255,0),is_top=False):
font = cv2.FONT_HERSHEY_SIMPLEX
box_xyxy = np.array(box_xyxy, dtype=np.int32)
cv2.rectangle(img,(box_xyxy[0],box_xyxy[1]),(box_xyxy[2],box_xyxy[3]),color,2)
if is_top:
img = cv2.putText(img, class_name, (box_xyxy[2], box_xyxy[3]+10), font, 1.2, color, 2)
else:
img = cv2.putText(img, class_name, (box_xyxy[0], box_xyxy[3]-10), font, 1.2, color, 2)
return img
def get_ratio(bbox_landmark,bbox_xml): # xywh-(landmark-groundtruth)
landmark_xc = bbox_landmark[0]+bbox_landmark[2]*0.5
landmark_yc = bbox_landmark[1]+bbox_landmark[3]*0.5
landmark_w = bbox_landmark[2]
landmark_h = bbox_landmark[3]
bbox_xml_xmin,bbox_xml_ymin,bbox_xml_xmax,bbox_xml_ymax = bbox_xml[0],bbox_xml[1],bbox_xml[0]+bbox_xml[2],bbox_xml[1]+bbox_xml[3]
right_ratio = (bbox_xml_xmax-landmark_xc)/(landmark_w/2)-1
left_ratio = (landmark_xc-bbox_xml_xmin)/(landmark_w/2)-1
top_ratio = (landmark_yc-bbox_xml_ymin)/(landmark_h/2)-1
bottom_ratio = (bbox_xml_ymax-landmark_yc)/(landmark_h/2)-1
return top_ratio,bottom_ratio,left_ratio,right_ratio
def comp_mean(dd):
dd = copy.deepcopy(dd)
min_d_idx=dd.index(min(dd))
del(dd[min_d_idx])
max_d_idx=dd.index(max(dd))
del(dd[max_d_idx])
return sum(dd)/len(dd)
def cut_pic(image,bbox):
bboxes = bbox.copy()
image_h, image_w, _ = image.shape
bboxes[0]=0 if bboxes[0]<0 else int(bboxes[0])
bboxes[1]=0 if bboxes[1]<0 else int(bboxes[1])
bboxes[3]=image_h if bboxes[3]>image_h else int(bboxes[3])
bboxes[2]=image_w if bboxes[2]>image_w else int(bboxes[2])
cropped = image[bboxes[1]:bboxes[3],bboxes[0]:bboxes[2]]
return cropped
def diff(landmark_json,xml_json):
# with open(landmark_json,'r') as f:
# landmarks = json.load(f)
# with open (xml_json,'r') as f1:
# xmls = json.load(f1)
Top,Bottom,Left,Right = {},{},{},{}
cocoGt_landmark = COCO(landmark_json)
cocoGt_xml = COCO(xml_json)
img_ids = cocoGt_landmark.getImgIds()
imgs = cocoGt_landmark.loadImgs(img_ids)
img_id_dict = {} #file_path:id
for img in imgs:
img_id = img['id']
file_name = img['file_name']
img_id_dict[file_name] = img_id
cat_ids = cocoGt_landmark.getCatIds()
cats = cocoGt_landmark.loadCats(cat_ids)
for cat in cats: #14
cat_id = cat['id']
cat_name = cat['name']
c_imgs_id = cocoGt_landmark.getImgIds(catIds=cat_id)
num = len(c_imgs_id)
top,bottom,left,right = [],[],[],[]
for img_id in c_imgs_id:
annIds_landmark = cocoGt_landmark.getAnnIds(imgIds=img_id, catIds=cat_id, iscrowd=None)
bbox_landmark = cocoGt_landmark.loadAnns(annIds_landmark)[0]['bbox']
annIds_xml = cocoGt_xml.getAnnIds(imgIds=img_id, catIds=cat_id, iscrowd=None)
bbox_xml = cocoGt_xml.loadAnns(annIds_xml)[0]['bbox']
t,b,l,r = get_ratio(bbox_landmark,bbox_xml)
# if t<0 or b<0 or l<0 or r<0:
# file = cocoGt_landmark.loadImgs([img_id])[0]['file_name']
# img = cv2.imread(file)
# img = draw_box1(img, [bbox_landmark[0],bbox_landmark[1],bbox_landmark[0]+bbox_landmark[2],bbox_landmark[1]+bbox_landmark[3]],cat_name,color=(0,255,0),is_top=False)
# img = draw_box1(img, [bbox_xml[0],bbox_xml[1],bbox_xml[0]+bbox_xml[2],bbox_xml[1]+bbox_xml[3]],cat_name,color=(0,0,255),is_top=False)
# cv2.imwrite('/workspace/xyf/hand_gesture_recongize/data/diff_perclass12_xml_landmark/'+file.split('/')[-1],img)
top.append(t)
bottom.append(b)
left.append(l)
right.append(r)
if num==0:
Top[cat_id]=0
Bottom[cat_id]=0
Left[cat_id]=0
Right[cat_id]=0
else:
Top[cat_id]=comp_mean(top)
Bottom[cat_id]=comp_mean(bottom)
Left[cat_id]=comp_mean(left)
Right[cat_id]=comp_mean(right)
return Top,Bottom,Left,Right
def xyxy_to_xywh(box):
bbox =copy.deepcopy(box)
bbox[2]=box[2]-box[0]
bbox[3]=box[3]-box[1]
return bbox
def xywh_to_xyxy(box):
bbox =copy.deepcopy(box)
bbox[2]=box[2]+box[0]
bbox[3]=box[3]+box[1]
return bbox
def xianfu(value, xianfu_v):
if value<xianfu_v[0]:
value=xianfu_v[0]
if value>xianfu_v[1]:
value=xianfu_v[1]
return value
def bigger_box(box,t_ratio,b_ratio,l_ratio,r_ratio,img_w_h):
# pdb.set_trace()
min_x, min_y, max_x, max_y = box
image_width,image_height=img_w_h
w=max_x-min_x
h=max_y-min_y
min_x=xianfu(min_x-int(w*l_ratio),[0,image_width])
max_x=xianfu(max_x+int(w*r_ratio),[0,image_width])
min_y=xianfu(min_y-int(h*t_ratio),[0,image_height])
max_y=xianfu(max_y+int(h*b_ratio),[0,image_height])
return [min_x, min_y, max_x, max_y]
def bigger_json():
landmark_json = '/workspace/xyf/mhw/hand_data/coco_json_gt_filepath.json'
xml_json = '/workspace/xyf/mhw/hand_data/coco_xml_gt_filepath.json'
Top,Bottom,Left,Right = diff(landmark_json,xml_json)
# print('Top:',Top)
# print('Bottom:',Bottom)
# print('Left:',Left)
# print('Right:',Right)
val_json = '/workspace/xyf/hand_gesture_recongize/data/Val_data/coco_gt_bak.json'
classes = ["0-one", "1-two", "2-three", "3-four", "4-five", "5-six","6-seven", "7-eight", "8-nine", "9-fist", "10-ok", "11-rock","12-thumbUp", "13-diss", "14-heartSingle","15-stop"]
class_map = dict(zip(classes,list(range(16))))
jdicts = []
images = []
categories = []
for key,value in class_map.items():
categories.append({"id":value,"name":key,"supercategory": ""})
cocoGt = COCO(val_json)
img_ids = cocoGt.getImgIds(catIds=6)
for key in Top.keys():
c_imgs_id = cocoGt.getImgIds(catIds=key)
for img_id in c_imgs_id:
img_info = cocoGt.loadImgs(img_id)
file = os.path.join('/workspace/xyf/hand_gesture_recongize/data/Val_data/JEPGimgs',img_info[0]['file_name'])
if os.path.exists(file):
images.append(img_info[0])
annIds = cocoGt.getAnnIds(imgIds=img_id, catIds=key, iscrowd=None)
ann_infos = cocoGt.loadAnns(annIds)
for ann_info in ann_infos:
tmp_ann_info = copy.deepcopy(ann_info)
img_w_h = [img_info[0]['width'],img_info[0]['height']]
tmp_ann_info['bbox']=xyxy_to_xywh(bigger_box(xywh_to_xyxy(tmp_ann_info['bbox']),Top[key],Bottom[key],Left[key],Right[key],img_w_h))
tmp_ann_info['area']=tmp_ann_info['bbox'][2]*tmp_ann_info['bbox'][3]
jdicts.append(tmp_ann_info)
img = cv2.imread(file)
img = draw_box1(img, xywh_to_xyxy(tmp_ann_info['bbox']),'ratio_'+str(key),color=(0,255,0),is_top=False)
img = draw_box1(img, xywh_to_xyxy(ann_info['bbox']),str(key),color=(0,0,255),is_top=False)
cv2.imwrite('/workspace/xyf/hand_gesture_recongize/data/diff_perclass12_xml_landmark/'+file.split('/')[-1],img)
coco_json = {'images': images, 'annotations': jdicts, 'categories': categories}
gt_json_file = '/workspace/xyf/hand_gesture_recongize/data/Val_data/coco_gt.json'
with open(gt_json_file, 'w') as file:
json.dump(coco_json, file)
print('Done!')
def dist(point1, point2):
# point1 = [0,0]
# point2 = [1,2]
return (point1[0]-point2[0])**2 + (point1[1]-point2[1])**2
def find_min_dist(center_xy, bbox_list): # find_min_dist([0,0], [[[0,0,1,1],0],[[5,5,1,1],0], [[2,2,1,1],0]])
min_dist = 10000
min_idx = -1
for idx, bbox in enumerate(bbox_list):
x1,y1,x2,y2 = bbox[0]
center_x = (x1+x2)/2
center_y = (y1+y2)/2
dist_val = dist(center_xy, [center_x, center_y])
if dist_val < min_dist:
min_dist = dist_val
min_idx = idx
return min_idx,min_dist
def main():
parser = ArgumentParser()
parser.add_argument('img', help='Image file')
parser.add_argument('train_json', default='/workspace/xyf/mhw/hand_data/coco_xml_gt_filepath.json',help='Config file')
parser.add_argument('checkpoint', help='Checkpoint file')
parser.add_argument('--device', default='cuda:0', help='Device used for inference')
args = parser.parse_args()
xml_json = args.train_json #ground—truth-json
cocoGt_xml = COCO(xml_json)
img_ids = cocoGt_xml.getImgIds()
imgs = cocoGt_xml.loadImgs(img_ids)
cat_ids = cocoGt_xml.getCatIds()
cats = cocoGt_xml.loadCats(cat_ids)
img_id_dict = {} #file_path:id
for img in imgs:
img_id = img['id']
file_name = img['file_name']
img_id_dict[file_name] = img_id
Top,Bottom,Left,Right = {},{},{},{}
IMAGE_FILES = []
train_filenames = args.img
source_path = os.path.join(train_filenames,'data_shujutang_5000')
jepg = os.path.join(source_path,'JEPGImages')
IMAGE_FILES = os.listdir(jepg)
# class_names = os.listdir(source_path)
# for class_ in class_names:
# class_path = os.path.join(train_filenames,class_)
# for pic in os.listdir(class_path):
# pic_path = os.path.join(class_path,pic)
# if pic.endswith('.jpg'):
# IMAGE_FILES.append(pic_path)
# source = args.img
# for pic in os.listdir(source):
# if pic.endswith('.jpg'):
# IMAGE_FILES.append(os.path.join(source,pic))
# build the model from a config file and a checkpoint file
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
jdict = []
one = 0
jepg = '/workspace/xyf/mhw/hand_data/data_cmii'
name_path = ['cy','cyy','dxy','hly','gyj','hj','jc']
IMAGE_FILES = ['16-1.jpg','16-3.jpg','16-4.jpg','16-2.jpg']
with mp_hands.Hands(static_image_mode=True,max_num_hands=5,min_detection_confidence=0.5) as hands:
for idx, pic_name in enumerate(IMAGE_FILES):
for file_name in name_path:
file = os.path.join(jepg,file_name,pic_name)
img_name = pic_name.split('.')[0]
image = cv2.imread(file)
image_height, image_width, _ = image.shape
results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
# ################################## rects ###############################################
# bbox_anno_image = image.copy()
# if results.hand_rects:
# for i,rect in enumerate(results.hand_rects):
# tmp_rect = [rect.x_center* image_width,rect.y_center* image_height,rect.width* image_width,rect.height* image_height]
# bbox_anno_image,bbox = draw_bbox1(bbox_anno_image,tmp_rect)
# crop_img = cut_pic(image,bbox)
# else:
# print('Can not detect hand from current picture')
# landmark_anno_image = image.copy()
################################## rects ###############################################
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
# print('hand_landmarks:', hand_landmarks) #return:手部21个关键点位置
# print(
# f'Index finger tip coordinates: (',
# f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * image_width}, '
# f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})'
# )
mp_drawing.draw_landmarks(landmark_anno_image,hand_landmarks,mp_hands.HAND_CONNECTIONS)
# if results.hand_rects is not None:
# for i,rect in enumerate(results.hand_rects):
# tmp_rect = [rect.x_center* image_width,rect.y_center* image_height,rect.width* image_width,rect.height* image_height]
# bbox_anno_image,bbox = draw_bbox(bbox_anno_image,tmp_rect) # bbox:xyxy
# crop_img = cut_pic(image,bbox)
# cv2.imwrite('/workspace/xyf/hand_gesture_recongize/data/data_shujutang_5000/crop_mediapipe/'+str(i)+'-'+pic_name,crop_img)
# cls_result = inference_model(model, crop_img)
# print(f"filename = {file} result={cls_result}")
# pdb.set_trace()
# jdict.append({'image_id': img_id_dict[img_name],\
# 'category_id': int(cls_result['pred_label']),\
# 'bbox': [round(x, 3) for x in xywh(tmp_rect)],\
# 'score': round(cls_result['pred_score'], 5)})
# jdict.append({'image_id': img_id_dict[img_name],\
# 'category_id': cocoGt.loadAnns(ids=[img_id_dict[img_name]])[0]['category_id'],\
# 'bbox': [round(x, 6) for x in xywh(tmp_rect)],\
# 'score': 1})
cv2.imwrite('/workspace/xyf/hand_gesture_recongize/data/data_shujutang_5000/result_mediapipe/'+file_name+'-landmark_'+pic_name,landmark_anno_image)
# print('idx:'+str(idx)+'/4674')
else: # bbox is None
one +=1
continue
print('Can not detect hand from current picture')
index = img_id_dict[img_name]
img_info = cocoGt.loadImgs(ids=[index])[0]
jdict.append({'image_id': index,\
'category_id': cocoGt.loadAnns(ids=[index])[0]['category_id'],\
'bbox': [0,0,img_info['width'],img_info['height']],\
'score': 1})
for cat in cats: #14
cat_id = cat['id']
cat_name = cat['name']
c_imgs_id = cocoGt_landmark.getImgIds(catIds=cat_id)
num = len(c_imgs_id)
top,bottom,left,right = [],[],[],[]
for img_id in c_imgs_id:
annIds_landmark = cocoGt_landmark.getAnnIds(imgIds=img_id, catIds=cat_id, iscrowd=None)
bbox_landmark = cocoGt_landmark.loadAnns(annIds_landmark)[0]['bbox']
annIds_xml = cocoGt_xml.getAnnIds(imgIds=img_id, catIds=cat_id, iscrowd=None)
bbox_xml = cocoGt_xml.loadAnns(annIds_xml)[0]['bbox']
t,b,l,r = get_ratio(bbox_landmark,bbox_xml)
# if t<0 or b<0 or l<0 or r<0:
# file = cocoGt_landmark.loadImgs([img_id])[0]['file_name']
# img = cv2.imread(file)
# img = draw_box1(img, [bbox_landmark[0],bbox_landmark[1],bbox_landmark[0]+bbox_landmark[2],bbox_landmark[1]+bbox_landmark[3]],cat_name,color=(0,255,0),is_top=False)
# img = draw_box1(img, [bbox_xml[0],bbox_xml[1],bbox_xml[0]+bbox_xml[2],bbox_xml[1]+bbox_xml[3]],cat_name,color=(0,0,255),is_top=False)
# cv2.imwrite('/workspace/xyf/hand_gesture_recongize/data/diff_perclass12_xml_landmark/'+file.split('/')[-1],img)
top.append(t)
bottom.append(b)
left.append(l)
right.append(r)
if num==0:
Top[cat_id]=0
Bottom[cat_id]=0
Left[cat_id]=0
Right[cat_id]=0
else:
Top[cat_id]=comp_mean(top)
Bottom[cat_id]=comp_mean(bottom)
Left[cat_id]=comp_mean(left)
Right[cat_id]=comp_mean(right)
return Top,Bottom,Left,Right
if __name__ == '__main__':
pass