read - xml

最新推荐文章于 2022-05-17 15:48:46 发布

highoooo

最新推荐文章于 2022-05-17 15:48:46 发布

阅读量418

点赞数

分类专栏： python AI 文章标签： xml 深度学习 python

本文链接：https://blog.csdn.net/highoooo/article/details/119934388

版权

AI 同时被 2 个专栏收录

27 篇文章 0 订阅

订阅专栏

python

26 篇文章 0 订阅

订阅专栏

xml -> annotation
class提前设定好

import xml.etree.ElementTree as ET
import random
import glob

all_class = ['straw hat' , 'train' , 'person' , 'hat' , 'head' , 'cellphone' , 'cell phone']

act_class = ['hat' , 'head' , 'cellphone']
def Read_Write_classestxt_trainanno_(xml_root, img_root):


    xml_list = glob.glob(f'{xml_root}/*')
    # random.seed(1)
    # random.shuffle(xml_list)
    annotation_lines = []
    classes = []
    for i, xml_name in enumerate(xml_list):
        # try:

        tree = ET.parse(xml_name)
        # root = tree.getroot()
        img_file_path = tree.findtext('./filename')
        obj_5features = ''
        for obj in tree.iter('object'):
            gt = obj.findtext('name')
            if gt == None:
                continue

            annotation_line = img_file_path + ' '
            for box in obj.iter('bndbox'):
                xmin = str(box.findtext('xmin')) + ','
                ymin = str(box.findtext('ymin')) + ','
                xmax = str(box.findtext('xmax')) + ','
                ymax = str(box.findtext('ymax')) + ','
                coor = xmin  + ymin + xmax + ymax + str(act_class.index(gt)) + ' '
                obj_5features += coor

            annotation_line += obj_5features
        annotation_lines.append(img_root + '/' + annotation_line)
        # exit()
        # except:
        #     # print(f'{img_root}/{img_file_path} hasn\'t box ')
        #     pass
    set_class = set(classes)

    with open("classes.txt", 'w') as f:  # 可以传全局变量进来
        for a in act_class:
            f.write(a + '\n')
    return annotation_lines
    # with open("train_anno.txt", 'w') as f:  # 可以传全局变量进来
    #     for a in train_img_paths:
    #         f.write(img_root + '/' + a + '\n')


if __name__=="__main__":
    xml_roots = ['data/label']
    img_roots = ['data/img']

    base_anno_lines = []
    for xml_root,img_root in zip(xml_roots,img_roots):
        single_folder_anno_lines = Read_Write_classestxt_trainanno_(xml_root, img_root)
        print(len(single_folder_anno_lines))
        base_anno_lines.append(single_folder_anno_lines)

    with open("train_anno.txt", 'w') as f:  # 可以传全局变量进来
        for single_folder_anno_lines in base_anno_lines:
            for anno_line in single_folder_anno_lines:
                # print(anno_line)
                f.write(anno_line + '\n')

对于xml的一些过滤

def filter4(xml_root, img_root):

    xml_list = glob.glob(f'../{xml_root}/*')
    classes = []
    error_num = 0
    for i, xml_name in enumerate(xml_list):
        # try:
        tree = ET.parse(xml_name)
        print(xml_name)
        root = tree.getroot()
        img_name = tree.findtext('./filename').split(".png")[0]
        img_path = "../" + img_root + '/' + img_name+".png"
        img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), 1)
        for a in root:
            for b in a:
                if b.tag == 'name':
                    if b.text == 'straw hat':
                        b.text = 'hat'
                    if b.text == 'train':
                        a.clear()
                    if b.text == 'person':
                        b.text = 'head'
                    if b.text == 'cell phone':
                        b.text = 'cellphone'
        length = []
        for a in root:
            for b in a :
                if b.tag == 'name':
                    length.append(b.text)
        if len(length) != 0:
            try:
                # cv2.imwrite(f"data/1/img/{img_name}.jpg", img)
                name = f"data/4/img/{img_name}.jpg"
                cv2.imencode('.jpg', img)[1].tofile(name)
                tree.write(f'data/4/label/{img_name}.xml', encoding='UTF-8')
            except:
                print(img_root + '/' + img_name + ".jpg")
                error_num+=1
                continue
        # except:
        #     error_num += 1
        #     continue

    print(error_num)
    return 0

对于annotation的抽样测试（不在dataloader中）

import random
import glob
import xml.etree.ElementTree as ET
import cv2
import os
import numpy as np


classes = ['hat','head','cellphone','person']


def random_test(train_txt_path, random_num):
    with open(train_txt_path, 'r') as f:
        lines = f.readlines()
    random.shuffle(lines)
    img_coors = [line.split() for line in lines]
    for i, img_coor in enumerate(img_coors[:random_num]):
        try:
            img_path = img_coor[0]
            img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), 1)

            coors = img_coor[1:]

            for co in coors:
                co = [int(x) for x in co.split(',')]
                coor = [(co[0], co[1]), (co[2], co[3])]
                gt = classes[co[4]]
                cv2.rectangle(img, coor[0], coor[1], (255, 255, 0), 2, 2)
                cv2.putText(img, gt, coor[0], cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)
            name = f"random_test/{i}.jpg"
            cv2.imencode('.jpg', img)[1].tofile(name)
        except:
            print(img_path)




if __name__ == '__main__':
    try:
        os.makedirs("random_test")
    except OSError:
        pass

    train_txt_path = 'train_anno.txt'
    random_num = 59

    random_test(train_txt_path, random_num)

highoooo

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
read - xml

xml -> annotationclass提前设定好import xml.etree.ElementTree as ETimport randomimport globall_class = ['straw hat' , 'train' , 'person' , 'hat' , 'head' , 'cellphone' , 'cell phone']act_class = ['hat' , 'head' , 'cellphone']def Read_Write_classest.
复制链接

扫一扫