【无标题】

最新推荐文章于 2023-12-12 15:19:28 发布
miaomiao妙妙
最新推荐文章于 2023-12-12 15:19:28 发布
阅读量90
点赞数
文章标签：计算机视觉人工智能深度学习
本文链接：https://blog.csdn.net/weixin_42130810/article/details/121370426
版权
diff_ratio
# -*- coding: utf-8 -*-

import os
import cv2
import re,json,copy
import random
import shutil,pdb
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import mediapipe as mp
import numpy as np
import colorsys
from argparse import ArgumentParser
from mmcls.apis import inference_model, init_model, show_result_pyplot
import shutil


def draw_box1(img, box_xyxy,class_name,color=(0,255,0),is_top=False):
    font = cv2.FONT_HERSHEY_SIMPLEX
    box_xyxy = np.array(box_xyxy, dtype=np.int32)
    cv2.rectangle(img,(box_xyxy[0],box_xyxy[1]),(box_xyxy[2],box_xyxy[3]),color,2)
    if is_top:
        img = cv2.putText(img, class_name, (box_xyxy[2], box_xyxy[3]+10), font, 1.2, color, 2)
    else:
        img = cv2.putText(img, class_name, (box_xyxy[0], box_xyxy[3]-10), font, 1.2, color, 2)
    return img

def get_ratio(bbox_landmark,bbox_xml): # xywh-(landmark-groundtruth)
    landmark_xc = bbox_landmark[0]+bbox_landmark[2]*0.5
    landmark_yc = bbox_landmark[1]+bbox_landmark[3]*0.5
    landmark_w = bbox_landmark[2]
    landmark_h = bbox_landmark[3]
    bbox_xml_xmin,bbox_xml_ymin,bbox_xml_xmax,bbox_xml_ymax = bbox_xml[0],bbox_xml[1],bbox_xml[0]+bbox_xml[2],bbox_xml[1]+bbox_xml[3]
    right_ratio = (bbox_xml_xmax-landmark_xc)/(landmark_w/2)-1
    left_ratio = (landmark_xc-bbox_xml_xmin)/(landmark_w/2)-1
    top_ratio = (landmark_yc-bbox_xml_ymin)/(landmark_h/2)-1
    bottom_ratio = (bbox_xml_ymax-landmark_yc)/(landmark_h/2)-1
    return top_ratio,bottom_ratio,left_ratio,right_ratio

def comp_mean(dd):
    dd = copy.deepcopy(dd)
    min_d_idx=dd.index(min(dd))
    del(dd[min_d_idx])
    max_d_idx=dd.index(max(dd))
    del(dd[max_d_idx])
    return sum(dd)/len(dd)


def cut_pic(image,bbox):
    bboxes = bbox.copy()
    image_h, image_w, _ = image.shape
    bboxes[0]=0 if bboxes[0]<0 else int(bboxes[0])
    bboxes[1]=0 if bboxes[1]<0 else int(bboxes[1])
    bboxes[3]=image_h if bboxes[3]>image_h else int(bboxes[3])
    bboxes[2]=image_w if bboxes[2]>image_w else int(bboxes[2])
    cropped = image[bboxes[1]:bboxes[3],bboxes[0]:bboxes[2]]
    return cropped


def diff(landmark_json,xml_json):
    # with open(landmark_json,'r') as f:
    #     landmarks = json.load(f)
    # with open (xml_json,'r') as f1:
    #     xmls = json.load(f1)
    Top,Bottom,Left,Right = {},{},{},{}

    cocoGt_landmark = COCO(landmark_json)
    cocoGt_xml = COCO(xml_json)

    img_ids = cocoGt_landmark.getImgIds()
    imgs = cocoGt_landmark.loadImgs(img_ids)
    img_id_dict = {} #file_path:id
    for img in imgs:
        img_id = img['id']
        file_name = img['file_name']
        img_id_dict[file_name] = img_id
    
    cat_ids = cocoGt_landmark.getCatIds()
    cats = cocoGt_landmark.loadCats(cat_ids)
    for cat in cats: #14
        cat_id = cat['id']
        cat_name = cat['name']
        c_imgs_id = cocoGt_landmark.getImgIds(catIds=cat_id)
        num = len(c_imgs_id)
        top,bottom,left,right = [],[],[],[]
        for img_id in c_imgs_id:
            annIds_landmark = cocoGt_landmark.getAnnIds(imgIds=img_id, catIds=cat_id, iscrowd=None)
            bbox_landmark = cocoGt_landmark.loadAnns(annIds_landmark)[0]['bbox']

            annIds_xml = cocoGt_xml.getAnnIds(imgIds=img_id, catIds=cat_id, iscrowd=None)
            bbox_xml = cocoGt_xml.loadAnns(annIds_xml)[0]['bbox']

            t,b,l,r = get_ratio(bbox_landmark,bbox_xml)
            # if t<0 or b<0 or l<0 or r<0:
            #     file = cocoGt_landmark.loadImgs([img_id])[0]['file_name']
            #     img = cv2.imread(file)
            #     img = draw_box1(img, [bbox_landmark[0],bbox_landmark[1],bbox_landmark[0]+bbox_landmark[2],bbox_landmark[1]+bbox_landmark[3]],cat_name,color=(0,255,0),is_top=False)
            #     img = draw_box1(img, [bbox_xml[0],bbox_xml[1],bbox_xml[0]+bbox_xml[2],bbox_xml[1]+bbox_xml[3]],cat_name,color=(0,0,255),is_top=False)
            #     cv2.imwrite('/workspace/xyf/hand_gesture_recongize/data/diff_perclass12_xml_landmark/'+file.split('/')[-1],img)
            top.append(t)
            bottom.append(b)
            left.append(l)
            right.append(r)
        if num==0:
            Top[cat_id]=0
            Bottom[cat_id]=0
            Left[cat_id]=0
            Right[cat_id]=0
        else:
            Top[cat_id]=comp_mean(top)
            Bottom[cat_id]=comp_mean(bottom)
            Left[cat_id]=comp_mean(left)
            Right[cat_id]=comp_mean(right)
    return Top,Bottom,Left,Right

def xyxy_to_xywh(box):
    bbox =copy.deepcopy(box)
    bbox[2]=box[2]-box[0]
    bbox[3]=box[3]-box[1]
    return bbox

def xywh_to_xyxy(box):
    bbox =copy.deepcopy(box)
    bbox[2]=box[2]+box[0]
    bbox[3]=box[3]+box[1]
    return bbox

def xianfu(value, xianfu_v):
    if value<xianfu_v[0]:
        value=xianfu_v[0]
    if value>xianfu_v[1]:
        value=xianfu_v[1]
    return value

def bigger_box(box,t_ratio,b_ratio,l_ratio,r_ratio,img_w_h):
    # pdb.set_trace()
    min_x, min_y, max_x, max_y = box
    image_width,image_height=img_w_h
    w=max_x-min_x
    h=max_y-min_y
    min_x=xianfu(min_x-int(w*l_ratio),[0,image_width])
    max_x=xianfu(max_x+int(w*r_ratio),[0,image_width])
    min_y=xianfu(min_y-int(h*t_ratio),[0,image_height])
    max_y=xianfu(max_y+int(h*b_ratio),[0,image_height])
    return [min_x, min_y, max_x, max_y]

def bigger_json():
    landmark_json = '/workspace/xyf/mhw/hand_data/coco_json_gt_filepath.json' 
    xml_json = '/workspace/xyf/mhw/hand_data/coco_xml_gt_filepath.json'  
    Top,Bottom,Left,Right = diff(landmark_json,xml_json)
    # print('Top:',Top)
    # print('Bottom:',Bottom)
    # print('Left:',Left)
    # print('Right:',Right)

    val_json = '/workspace/xyf/hand_gesture_recongize/data/Val_data/coco_gt_bak.json'
    classes = ["0-one", "1-two", "2-three", "3-four", "4-five", "5-six","6-seven", "7-eight", "8-nine", "9-fist", "10-ok", "11-rock","12-thumbUp", "13-diss", "14-heartSingle","15-stop"]
    class_map = dict(zip(classes,list(range(16))))
    jdicts = []
    images = []
    categories = []
    for key,value in class_map.items():
        categories.append({"id":value,"name":key,"supercategory": ""})

    cocoGt = COCO(val_json)
    img_ids = cocoGt.getImgIds(catIds=6)
    for key in Top.keys():
        c_imgs_id = cocoGt.getImgIds(catIds=key)
        for img_id in c_imgs_id:
            img_info = cocoGt.loadImgs(img_id)
            file = os.path.join('/workspace/xyf/hand_gesture_recongize/data/Val_data/JEPGimgs',img_info[0]['file_name'])
            if os.path.exists(file):
                images.append(img_info[0])
                annIds = cocoGt.getAnnIds(imgIds=img_id, catIds=key, iscrowd=None)
                ann_infos = cocoGt.loadAnns(annIds)
                for ann_info in ann_infos:
                    tmp_ann_info = copy.deepcopy(ann_info)
                    img_w_h = [img_info[0]['width'],img_info[0]['height']]
                    tmp_ann_info['bbox']=xyxy_to_xywh(bigger_box(xywh_to_xyxy(tmp_ann_info['bbox']),Top[key],Bottom[key],Left[key],Right[key],img_w_h))
                    tmp_ann_info['area']=tmp_ann_info['bbox'][2]*tmp_ann_info['bbox'][3]
                    jdicts.append(tmp_ann_info)

                    img = cv2.imread(file) 
                    img = draw_box1(img, xywh_to_xyxy(tmp_ann_info['bbox']),'ratio_'+str(key),color=(0,255,0),is_top=False)
                    img = draw_box1(img, xywh_to_xyxy(ann_info['bbox']),str(key),color=(0,0,255),is_top=False)
                    cv2.imwrite('/workspace/xyf/hand_gesture_recongize/data/diff_perclass12_xml_landmark/'+file.split('/')[-1],img)

    coco_json = {'images': images, 'annotations': jdicts, 'categories': categories}
    gt_json_file = '/workspace/xyf/hand_gesture_recongize/data/Val_data/coco_gt.json'
    with open(gt_json_file, 'w') as file:
        json.dump(coco_json, file)
    print('Done!')

def dist(point1, point2): 
    # point1 = [0,0]
    # point2 = [1,2]
    return (point1[0]-point2[0])**2 + (point1[1]-point2[1])**2

def find_min_dist(center_xy, bbox_list): # find_min_dist([0,0], [[[0,0,1,1],0],[[5,5,1,1],0], [[2,2,1,1],0]])
    min_dist = 10000
    min_idx = -1
    for idx, bbox in enumerate(bbox_list):
        x1,y1,x2,y2 = bbox[0]
        center_x = (x1+x2)/2
        center_y = (y1+y2)/2
        dist_val = dist(center_xy, [center_x, center_y])
        if dist_val < min_dist:
            min_dist = dist_val
            min_idx = idx
    return min_idx,min_dist

def main():
    parser = ArgumentParser()
    parser.add_argument('img', help='Image file')
    parser.add_argument('train_json', default='/workspace/xyf/mhw/hand_data/coco_xml_gt_filepath.json',help='Config file')
    parser.add_argument('checkpoint', help='Checkpoint file')
    parser.add_argument('--device', default='cuda:0', help='Device used for inference')
    args = parser.parse_args()

    xml_json = args.train_json   #ground—truth-json
    cocoGt_xml = COCO(xml_json)
    img_ids = cocoGt_xml.getImgIds()
    imgs = cocoGt_xml.loadImgs(img_ids)
    cat_ids = cocoGt_xml.getCatIds()
    cats = cocoGt_xml.loadCats(cat_ids)
    img_id_dict = {} #file_path:id
    for img in imgs:
        img_id = img['id']
        file_name = img['file_name']
        img_id_dict[file_name] = img_id

    Top,Bottom,Left,Right = {},{},{},{}

    IMAGE_FILES = []
    train_filenames = args.img
    source_path = os.path.join(train_filenames,'data_shujutang_5000')
    jepg = os.path.join(source_path,'JEPGImages')
    IMAGE_FILES = os.listdir(jepg)
    # class_names = os.listdir(source_path)
    # for class_ in class_names:
    #     class_path = os.path.join(train_filenames,class_)
    #     for pic in os.listdir(class_path):
    #         pic_path = os.path.join(class_path,pic)
    #         if pic.endswith('.jpg'):
    #             IMAGE_FILES.append(pic_path)
    # source = args.img
    # for pic in os.listdir(source):
    #     if pic.endswith('.jpg'):
    #         IMAGE_FILES.append(os.path.join(source,pic))
    # build the model from a config file and a checkpoint file

    mp_drawing = mp.solutions.drawing_utils
    mp_hands = mp.solutions.hands
    jdict = []
    one = 0

    jepg = '/workspace/xyf/mhw/hand_data/data_cmii'
    name_path = ['cy','cyy','dxy','hly','gyj','hj','jc']
    IMAGE_FILES = ['16-1.jpg','16-3.jpg','16-4.jpg','16-2.jpg']

    with mp_hands.Hands(static_image_mode=True,max_num_hands=5,min_detection_confidence=0.5) as hands:
        for idx, pic_name in enumerate(IMAGE_FILES):
            for file_name in name_path:
                file = os.path.join(jepg,file_name,pic_name)
                img_name = pic_name.split('.')[0]
                image = cv2.imread(file)
                image_height, image_width, _ = image.shape
                results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
                # ################################## rects ###############################################
                # bbox_anno_image = image.copy()
                # if results.hand_rects:
                #     for i,rect in enumerate(results.hand_rects):
                #         tmp_rect = [rect.x_center* image_width,rect.y_center* image_height,rect.width* image_width,rect.height* image_height]
                #         bbox_anno_image,bbox = draw_bbox1(bbox_anno_image,tmp_rect)
                #         crop_img = cut_pic(image,bbox) 
                # else:
                #     print('Can not detect hand from current picture')
                #     landmark_anno_image = image.copy()
                ################################## rects ###############################################
                if results.multi_hand_landmarks:
                    for hand_landmarks in results.multi_hand_landmarks:
                        # print('hand_landmarks:', hand_landmarks) #return:手部21个关键点位置
                        # print(
                        #     f'Index finger tip coordinates: (',
                        #     f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * image_width}, '
                        #     f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})'
                        # )
                        mp_drawing.draw_landmarks(landmark_anno_image,hand_landmarks,mp_hands.HAND_CONNECTIONS)
                # if results.hand_rects is not None:
                #     for i,rect in enumerate(results.hand_rects):
                #         tmp_rect = [rect.x_center* image_width,rect.y_center* image_height,rect.width* image_width,rect.height* image_height]
                #         bbox_anno_image,bbox = draw_bbox(bbox_anno_image,tmp_rect)   # bbox:xyxy
                #         crop_img = cut_pic(image,bbox) 
                        # cv2.imwrite('/workspace/xyf/hand_gesture_recongize/data/data_shujutang_5000/crop_mediapipe/'+str(i)+'-'+pic_name,crop_img)
                        # cls_result = inference_model(model, crop_img)
                        # print(f"filename = {file} result={cls_result}")
                        # pdb.set_trace()
                        # jdict.append({'image_id': img_id_dict[img_name],\
                        #     'category_id': int(cls_result['pred_label']),\
                        #     'bbox': [round(x, 3) for x in xywh(tmp_rect)],\
                        #      'score': round(cls_result['pred_score'], 5)})
                        # jdict.append({'image_id': img_id_dict[img_name],\
                        #     'category_id': cocoGt.loadAnns(ids=[img_id_dict[img_name]])[0]['category_id'],\
                        #     'bbox': [round(x, 6) for x in xywh(tmp_rect)],\
                        #     'score': 1})
                    cv2.imwrite('/workspace/xyf/hand_gesture_recongize/data/data_shujutang_5000/result_mediapipe/'+file_name+'-landmark_'+pic_name,landmark_anno_image)
                    # print('idx:'+str(idx)+'/4674')
                else:  # bbox is None
                    one +=1
                    continue
                    print('Can not detect hand from current picture')
                    index = img_id_dict[img_name]
                    img_info = cocoGt.loadImgs(ids=[index])[0]
                    jdict.append({'image_id': index,\
                            'category_id': cocoGt.loadAnns(ids=[index])[0]['category_id'],\
                            'bbox': [0,0,img_info['width'],img_info['height']],\
                            'score': 1})



    for cat in cats: #14
        cat_id = cat['id']
        cat_name = cat['name']
        c_imgs_id = cocoGt_landmark.getImgIds(catIds=cat_id)
        num = len(c_imgs_id)
        top,bottom,left,right = [],[],[],[]
        for img_id in c_imgs_id:
            annIds_landmark = cocoGt_landmark.getAnnIds(imgIds=img_id, catIds=cat_id, iscrowd=None)
            bbox_landmark = cocoGt_landmark.loadAnns(annIds_landmark)[0]['bbox']

            annIds_xml = cocoGt_xml.getAnnIds(imgIds=img_id, catIds=cat_id, iscrowd=None)
            bbox_xml = cocoGt_xml.loadAnns(annIds_xml)[0]['bbox']

            t,b,l,r = get_ratio(bbox_landmark,bbox_xml)
            # if t<0 or b<0 or l<0 or r<0:
            #     file = cocoGt_landmark.loadImgs([img_id])[0]['file_name']
            #     img = cv2.imread(file)
            #     img = draw_box1(img, [bbox_landmark[0],bbox_landmark[1],bbox_landmark[0]+bbox_landmark[2],bbox_landmark[1]+bbox_landmark[3]],cat_name,color=(0,255,0),is_top=False)
            #     img = draw_box1(img, [bbox_xml[0],bbox_xml[1],bbox_xml[0]+bbox_xml[2],bbox_xml[1]+bbox_xml[3]],cat_name,color=(0,0,255),is_top=False)
            #     cv2.imwrite('/workspace/xyf/hand_gesture_recongize/data/diff_perclass12_xml_landmark/'+file.split('/')[-1],img)
            top.append(t)
            bottom.append(b)
            left.append(l)
            right.append(r)
        if num==0:
            Top[cat_id]=0
            Bottom[cat_id]=0
            Left[cat_id]=0
            Right[cat_id]=0
        else:
            Top[cat_id]=comp_mean(top)
            Bottom[cat_id]=comp_mean(bottom)
            Left[cat_id]=comp_mean(left)
            Right[cat_id]=comp_mean(right)
    return Top,Bottom,Left,Right
    
if __name__ == '__main__':
    pass