目标检测Faster RCNN 批量标记数据集，生成xml，csv以及record文件

最新推荐文章于 2022-04-01 23:01:42 发布

地瓜埋地下

最新推荐文章于 2022-04-01 23:01:42 发布

阅读量1.1k

点赞数 1

本文链接：https://blog.csdn.net/weixin_44268208/article/details/108042887

版权

目标检测Faster RCNN 批量标记数据集，生成xml，csv以及record文件

0.将png图片转换为jpg

import os
from PIL import Image

# dirname_read="png格式文件夹路径/"   # 注意后面的斜杠
# dirname_write="jpg格式输出路径/"
dirname_read="G:\TensorflowDectionAPI2\Azxh_woxuan\image/"   # 注意后面的斜杠
dirname_write="G:\TensorflowDectionAPI2\Azxh_woxuan\image_jpg/"
names=os.listdir(dirname_read)
count=0
for name in names:
    img=Image.open(dirname_read+name)
    name=name.split(".")
    if name[-1] == "png":
        name[-1] = "jpg"
        name = str.join(".", name)
        r,g,b,a=img.split()
        img=Image.merge("RGB",(r,g,b))
        to_save_path = dirname_write + name
        img.save(to_save_path)
        count+=1
        print(to_save_path, "------conut：",count)
    else:
        continue

1.打乱图片顺序并重新命名

# coding=gbk
import os
import random


class ImageRename():

    def __init__(self):
        self.path = 'G:\TensorflowDectionAPI2\images-NV\shuffer'  # 图片所在文件夹

    def rename(self):
        filelist = os.listdir(self.path)
        random.shuffle(filelist)
        total_num = len(filelist)

        i = 0

        for item in filelist:
            print(item)
            if item.endswith('.jpg'):
                src = os.path.join(os.path.abspath(self.path), item)
                dst = os.path.join(os.path.abspath(self.path), '00' + format(str(i), '0>3s')+ '-' + item )
                os.rename(src, dst)
                print('converting %s to %s ...' % (src, dst))
                i = i + 1
            print('total %d to rename & converted %d jpgs' % (total_num, i))


if __name__ == '__main__':
    newname = ImageRename()
    newname.rename()

2.批量生成xml文件

我自己的图片命名如下：
在这里插入图片描述

#! /usr/bin/python
# -*- coding:UTF-8 -*-
import os, sys
import glob
from PIL import Image

# VEDAI 图像存储位置
src_img_dir = "G:\TensorflowDectionAPI2\images-NV\shuffer" #modification
# VEDAI 图像生成的xml文件存放位置
src_xml_dir = "G:\TensorflowDectionAPI2\images-NV\shuffer_auto"

img_Lists = glob.glob(src_img_dir + '/*.jpg')

img_basenames = []  # e.g. 100.jpg
for item in img_Lists:
    img_basenames.append(os.path.basename(item))

img_names = []  # e.g. 100
for item in img_basenames:
    temp1, temp2 = os.path.splitext(item)
    img_names.append(temp1)

for img in img_names:
	# 根据命名方式生成自己的index标记
    filename = img
    filename_split = filename.split('-')
    # print(filename_split)
    th = filename_split[1]
    phi = filename_split[2]
    th = int(th)
    phi = int(phi)
    # print([th, phi])
    if (0 <= th <= 30) & (0 <= phi <= 30):
        index = "000-030_000-030"
    elif (0 <= th <= 30) & (31 <= phi <= 60):
        index = "000-030_031-060"
    elif (0 <= th <= 30) & (61 <= phi <= 90):
        index = "000-030_061-090"
    elif (0 <= th <= 30) & (91 <= phi <= 120):
        index = "000-030_091-120"
    elif (0 <= th <= 30) & (121 <= phi <= 150):
        index = "000-030_121-150"
    elif (0 <= th <= 30) & (151 <= phi <= 180):
        index = "000-030_151-180"

    elif (61 <= th <= 90) & (0 <= phi <= 30):
        index = "061-090_000-030"
    elif (61 <= th <= 90) & (31 <= phi <= 60):
        index = "061-090_031-060"
    elif (61 <= th <= 90) & (61 <= phi <= 90):
        index = "061-090_061-090"
    elif (61 <= th <= 90) & (91 <= phi <= 120):
        index = "061-090_091-120"
    elif (61 <= th <= 90) & (121 <= phi <= 150):
        index = "061-090_0121-150"
    elif (61 <= th <= 90) & (151 <= phi <= 180):
        index = "061-090_151-180"


    im = Image.open((src_img_dir + '/' + img + '.jpg'))
    width, height = im.size
    # write in xml file 按照xml文件格式，书写自己的xml文件
    # os.mknod(src_xml_dir + '/' + img + '.xml')
    xml_file = open((src_xml_dir + '/' + img + '.xml'), 'w')
    xml_file.write('<annotation>\n')
    xml_file.write('    <folder>shuffer_auto</folder>\n')
    xml_file.write('    <filename>' + str(img) + '.jpg' + '</filename>\n')
    xml_file.write('    <path>' + src_xml_dir + '/' + str(img) + '.jpg' + '</path>\n')
    xml_file.write('    <source>\n')
    xml_file.write('        <database>' + "Unknow" + '</database>\n')
    xml_file.write('    </source>\n')
    xml_file.write('    <size>\n')
    xml_file.write('        <width>' + str(width) + '</width>\n')
    xml_file.write('        <height>' + str(height) + '</height>\n')
    xml_file.write('        <depth>1</depth>\n')
    xml_file.write('    </size>\n')
    xml_file.write('    <segmented>0</segmented>\n')
    xml_file.write('    <object>\n')
    xml_file.write('        <name>' + index + '</name>\n')  #标记
    xml_file.write('        <pose>Unspecified</pose>\n')
    xml_file.write('        <truncated>0</truncated>\n')
    xml_file.write('        <difficult>0</difficult>\n')
    xml_file.write('        <bndbox>\n')
    xml_file.write('            <xmin>' + str(150) + '</xmin>\n')
    xml_file.write('            <ymin>' + str(67)+ '</ymin>\n')
    xml_file.write('            <xmax>' + str(510) + '</xmax>\n')
    xml_file.write('            <ymax>' + str(410) + '</ymax>\n')
    xml_file.write('        </bndbox>\n')
    xml_file.write('    </object>\n')
    xml_file.write('</annotation>')

写出来的效果图
在这里插入图片描述
对于xml文档里，我的理解是标签是关于图片本身的，标签是关于标签的,depth是关于颜色的通道
参考链接：目标检测xml文档理解

3.从xml文件生成csv文件

#!/usr/bin/env python
# -*- coding:utf-8 -*-

import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET

# train 和 test 要生成两遍，也就是两个csv,每个csv包含了各自文件夹里所有xml的信息

os.chdir('G:\TensorflowDectionAPI2\images-NV\\test')  # 每个xml的文件路径
path = 'G:\TensorflowDectionAPI2\images-NV\\test'


def xml_to_csv(path):
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            value = (root.find('filename').text,
                     int(root.find('size')[0].text),
                     int(root.find('size')[1].text),
                     member[0].text,
                     int(member[4][0].text),
                     int(member[4][1].text),
                     int(member[4][2].text),
                     int(member[4][3].text)
                     )
            xml_list.append(value)
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df


def main():
    image_path = path
    xml_df = xml_to_csv(image_path)
    # xml_df.to_csv('test_labels.csv', index=None)  #要保存的csv的对应路径
    xml_df.to_csv('NV_12_labelTest.csv', index=None)  #要保存的csv的对应路径
    print('Successfully converted xml to csv.')


main()

4.从csv文件生成record文件

# -*- coding: utf-8 -*-
"""
Created on Tue Jan 16 01:04:55 2018
@author: Xiang Guo
由CSV文件生成TFRecord文件
"""
"""
Usage:
  # From tensorflow/models/
  # Create train data:
  python generate_tfrecord.py --csv_input=data/tv_vehicle_labels.csv  --output_path=train.record
  # Create test data:
  python generate_tfrecord.py --csv_input=data/test_labels.csv  --output_path=test.record
"""
import os
import io
import pandas as pd
import tensorflow as tf
from PIL import Image
from object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict

flags = tf.app.flags
flags.DEFINE_string('csv_input', '', 'Path to the CSV input')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')

FLAGS = flags.FLAGS
# TO-DO replace this with label map
#注意将对应的label改成自己的类别！！！！！！！！！！
def class_text_to_int(row_label):
    if row_label == '000-030_000-030':
        return 1
    elif row_label == '000-030_031-060':
        return 2
    elif row_label == '000-030_061-090':
        return 3
    elif row_label == '000-030_091-120':
        return 4
    elif row_label == '000-030_121-150':
        return 5
    elif row_label == '000-030_151-180':
        return 6
    elif row_label == '061-090_000-030':
        return 7
    elif row_label == '061-090_031-060':
        return 8
    elif row_label == '061-090_061-090':
        return 9
    elif row_label == '061-090_091-120':
        return 10
    elif row_label == '061-090_0121-150':
        return 11
    elif row_label == '061-090_151-180':
        return 12
    else:
        None
def split(df, group):
    data = namedtuple('data', ['filename', 'object'])
    gb = df.groupby(group)
    return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]
def create_tf_example(group, path):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size
    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []
    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))
    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example


def main(csv_input, output_path, image_path):
    writer = tf.python_io.TFRecordWriter(output_path)
    path = image_path
    examples = pd.read_csv(csv_input)
    grouped = split(examples, 'filename')
    for group in grouped:
        tf_example = create_tf_example(group, path)
        writer.write(tf_example.SerializeToString())

    writer.close()
    print('Successfully created the TFRecords: {}'.format(output_path))

if __name__ == '__main__':
#     三个路径分别放置自己的地址以及要生成的tfrecord文档的位置和图片放置的位置
    csv_input = r'G:\TensorflowDectionAPI2\images-NV\\test\NV_12_labelTest.csv'
    output_path = r'G:\TensorflowDectionAPI2\images-NV\\test\NV_test.tfrecord'
    image_path = r'G:\TensorflowDectionAPI2\images-NV\\test_image'
    main(csv_input, output_path, image_path)

注：如果自己打的标签是整型，可能会出现None has type NoneType, but expected one of: int, long的错误，那么只要将自己代码中出现的int类型转换为str类型即可，如下所示


def create_tf_example(group, path):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size
    filename = group.filename.encode('utf8')
    # image_format = b'png'
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []
    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        # classes_text.append(row['class'])
        # print(type(row['class']))下面这两行均行加了强制转换
        classes_text.append(str(row['class']).encode('utf8'))
        classes.append(class_text_to_int(str(row['class'])))
    print(classes)
    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example

对于我自己随机产生数据图片的代码

# Load the Pandas libraries with alias 'pd'
import pandas as pd
# Read data from file 'filename.csv'
# (in the same directory that your python process is based)
# Control delimiters, rows, column names with read_csv (see later)

import random
from math import *

from scipy.special import jv as besselj

import numpy as np
import matplotlib.pyplot as plt
import scipy.integrate as integrate
# from PIL import Image

from numpy import sqrt, cos, sin, exp
n = 1.518
wl = 532e-3
NA = 1.4
alpha = np.arcsin(NA / n)
an0 = 0
k = (2 * np.pi) / wl

def votex_func( x, y, a, b, th, ph, x0, y0):
    # the 和 phi 在这里是弧度值，
    #print(x, y, a, b, th, ph, x0, y0)
    x = x - x0
    y = y - y0

    th = th/180 * np.pi
    ph = ph/180 * np.pi

    Er_func = lambda theta: (sqrt(cos(theta))) * (sin(2 * theta)) * besselj(1, k * sqrt(x ** 2 + y ** 2) * sin(
        theta))
    Ir = integrate.quad(Er_func, an0, alpha, limit=1000)[0]
    Ex = Ir * x / np.sqrt(x ** 2 + y ** 2) if x != 0 else 0
    Ey = Ir * y / np.sqrt(x ** 2 + y ** 2) if y != 0 else 0
    x1 = cos(ph) * cos(th)
    y1 = sin(ph) * cos(th)

    x2 = -1 * sin(ph)
    y2 = cos(ph)

    I = (abs(Ex * x1 + Ey * y1)) ** 2 + (abs(Ex * x2 + Ey * y2)) ** 2
    # print(type(I))
    return I * a + b

def generateRondom(rows=60, columns=60):

    image = np.zeros((rows, columns))
    xaxis = np.linspace(-0.6, 0.6, rows)
    yaxis = np.linspace(-0.6, 0.6, columns)
    '''
        for phi in range(180):
        for th in range(90):
            for ii in range(row):
                for jj in range(column):
                    dataall[ii][jj] = votex_func(x=dataall[ii], y=dataall[jj],a=1, b=0, th=th, ph=phi, x0=0.3, y0=0.3)
            plt.imshow(dataall, cmap='gray')
            plt.savefig("G:\\TensorflowDectionAPI2\\images-NV\\{0}-{1}.jpg".format(th, phi))
           
            dict = {'theta:{0},phi:{1}'.format(th, phi): dataall}
            print(dict)
    '''
    # 每种50张图片，也就是k=50，phi分成6种，th分成3种,但是theta不会取到30-60
    for k in range(50):
        th = random.randint(60, 90) #(0,30)
        phi = random.randint(150, 180)   # randint(a,b) --- 产生a,b范围内的整数，包含开头和结尾
        # phi = random.randint(0,180)   # randint(a,b) --- 产生a,b范围内的整数，包含开头和结尾

        # [(0,30),(0,30)],  [(0,30)(30-60)], [(0,30),(60,90)], [(0,30),(90,120)], [(0,30),(120,150)], [(0,30),(150,180)]
        # [(60,90),(0,30)],  [(60,90)(30-60)], [(60,90),(60,90)], [(60,90),(90,120)], [(60,90),(120,150)], [(60,90),(150,180)]

        for ix in range(rows):
            for iy in range(columns):
                image[ix][iy] = votex_func(x=xaxis[ix], y=yaxis[iy], a=1, b=0, th=th, ph=phi, x0=0., y0=0.)
        plt.imshow(image, cmap='gray')
        plt.savefig("..\images-NV\classfication12\{0}-{1}.jpg".format(th, phi))

        dict = {'theta:{0},phi:{1}'.format(th, phi): image}
        print(dict)

if __name__ == '__main__':
    generateRondom()

地瓜埋地下

关注

1
点赞
踩
8

收藏

觉得还不错? 一键收藏
0
评论
目标检测Faster RCNN 批量标记数据集，生成xml，csv以及record文件

目标检测Faster RCNN 批量标记数据集，生成xml，csv以及record文件0.打乱图片顺序并重新命名# coding=gbkimport osimport randomclass ImageRename(): def __init__(self): self.path = 'G:\TensorflowDectionAPI2\images-NV\shuffer' # 图片所在文件夹 def rename(self): fileli
复制链接

扫一扫