目标检测Faster RCNN 批量标记数据集,生成xml,csv以及record文件
0.将png图片转换为jpg
import os
from PIL import Image
# dirname_read="png格式文件夹路径/" # 注意后面的斜杠
# dirname_write="jpg格式输出路径/"
dirname_read="G:\TensorflowDectionAPI2\Azxh_woxuan\image/" # 注意后面的斜杠
dirname_write="G:\TensorflowDectionAPI2\Azxh_woxuan\image_jpg/"
names=os.listdir(dirname_read)
count=0
for name in names:
img=Image.open(dirname_read+name)
name=name.split(".")
if name[-1] == "png":
name[-1] = "jpg"
name = str.join(".", name)
r,g,b,a=img.split()
img=Image.merge("RGB",(r,g,b))
to_save_path = dirname_write + name
img.save(to_save_path)
count+=1
print(to_save_path, "------conut:",count)
else:
continue
1.打乱图片顺序并重新命名
# coding=gbk
import os
import random
class ImageRename():
def __init__(self):
self.path = 'G:\TensorflowDectionAPI2\images-NV\shuffer' # 图片所在文件夹
def rename(self):
filelist = os.listdir(self.path)
random.shuffle(filelist)
total_num = len(filelist)
i = 0
for item in filelist:
print(item)
if item.endswith('.jpg'):
src = os.path.join(os.path.abspath(self.path), item)
dst = os.path.join(os.path.abspath(self.path), '00' + format(str(i), '0>3s')+ '-' + item )
os.rename(src, dst)
print('converting %s to %s ...' % (src, dst))
i = i + 1
print('total %d to rename & converted %d jpgs' % (total_num, i))
if __name__ == '__main__':
newname = ImageRename()
newname.rename()
2.批量生成xml文件
我自己的图片命名如下:
#! /usr/bin/python
# -*- coding:UTF-8 -*-
import os, sys
import glob
from PIL import Image
# VEDAI 图像存储位置
src_img_dir = "G:\TensorflowDectionAPI2\images-NV\shuffer" #modification
# VEDAI 图像生成的xml文件存放位置
src_xml_dir = "G:\TensorflowDectionAPI2\images-NV\shuffer_auto"
img_Lists = glob.glob(src_img_dir + '/*.jpg')
img_basenames = [] # e.g. 100.jpg
for item in img_Lists:
img_basenames.append(os.path.basename(item))
img_names = [] # e.g. 100
for item in img_basenames:
temp1, temp2 = os.path.splitext(item)
img_names.append(temp1)
for img in img_names:
# 根据命名方式生成自己的index标记
filename = img
filename_split = filename.split('-')
# print(filename_split)
th = filename_split[1]
phi = filename_split[2]
th = int(th)
phi = int(phi)
# print([th, phi])
if (0 <= th <= 30) & (0 <= phi <= 30):
index = "000-030_000-030"
elif (0 <= th <= 30) & (31 <= phi <= 60):
index = "000-030_031-060"
elif (0 <= th <= 30) & (61 <= phi <= 90):
index = "000-030_061-090"
elif (0 <= th <= 30) & (91 <= phi <= 120):
index = "000-030_091-120"
elif (0 <= th <= 30) & (121 <= phi <= 150):
index = "000-030_121-150"
elif (0 <= th <= 30) & (151 <= phi <= 180):
index = "000-030_151-180"
elif (61 <= th <= 90) & (0 <= phi <= 30):
index = "061-090_000-030"
elif (61 <= th <= 90) & (31 <= phi <= 60):
index = "061-090_031-060"
elif (61 <= th <= 90) & (61 <= phi <= 90):
index = "061-090_061-090"
elif (61 <= th <= 90) & (91 <= phi <= 120):
index = "061-090_091-120"
elif (61 <= th <= 90) & (121 <= phi <= 150):
index = "061-090_0121-150"
elif (61 <= th <= 90) & (151 <= phi <= 180):
index = "061-090_151-180"
im = Image.open((src_img_dir + '/' + img + '.jpg'))
width, height = im.size
# write in xml file 按照xml文件格式,书写自己的xml文件
# os.mknod(src_xml_dir + '/' + img + '.xml')
xml_file = open((src_xml_dir + '/' + img + '.xml'), 'w')
xml_file.write('<annotation>\n')
xml_file.write(' <folder>shuffer_auto</folder>\n')
xml_file.write(' <filename>' + str(img) + '.jpg' + '</filename>\n')
xml_file.write(' <path>' + src_xml_dir + '/' + str(img) + '.jpg' + '</path>\n')
xml_file.write(' <source>\n')
xml_file.write(' <database>' + "Unknow" + '</database>\n')
xml_file.write(' </source>\n')
xml_file.write(' <size>\n')
xml_file.write(' <width>' + str(width) + '</width>\n')
xml_file.write(' <height>' + str(height) + '</height>\n')
xml_file.write(' <depth>1</depth>\n')
xml_file.write(' </size>\n')
xml_file.write(' <segmented>0</segmented>\n')
xml_file.write(' <object>\n')
xml_file.write(' <name>' + index + '</name>\n') #标记
xml_file.write(' <pose>Unspecified</pose>\n')
xml_file.write(' <truncated>0</truncated>\n')
xml_file.write(' <difficult>0</difficult>\n')
xml_file.write(' <bndbox>\n')
xml_file.write(' <xmin>' + str(150) + '</xmin>\n')
xml_file.write(' <ymin>' + str(67)+ '</ymin>\n')
xml_file.write(' <xmax>' + str(510) + '</xmax>\n')
xml_file.write(' <ymax>' + str(410) + '</ymax>\n')
xml_file.write(' </bndbox>\n')
xml_file.write(' </object>\n')
xml_file.write('</annotation>')
写出来的效果图
对于xml文档里,我的理解是标签是关于图片本身的,标签是关于标签的,depth是关于颜色的通道
参考链接:目标检测xml文档理解
3.从xml文件生成csv文件
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
# train 和 test 要生成两遍,也就是两个csv,每个csv包含了各自文件夹里所有xml的信息
os.chdir('G:\TensorflowDectionAPI2\images-NV\\test') # 每个xml的文件路径
path = 'G:\TensorflowDectionAPI2\images-NV\\test'
def xml_to_csv(path):
xml_list = []
for xml_file in glob.glob(path + '/*.xml'):
tree = ET.parse(xml_file)
root = tree.getroot()
for member in root.findall('object'):
value = (root.find('filename').text,
int(root.find('size')[0].text),
int(root.find('size')[1].text),
member[0].text,
int(member[4][0].text),
int(member[4][1].text),
int(member[4][2].text),
int(member[4][3].text)
)
xml_list.append(value)
column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
xml_df = pd.DataFrame(xml_list, columns=column_name)
return xml_df
def main():
image_path = path
xml_df = xml_to_csv(image_path)
# xml_df.to_csv('test_labels.csv', index=None) #要保存的csv的对应路径
xml_df.to_csv('NV_12_labelTest.csv', index=None) #要保存的csv的对应路径
print('Successfully converted xml to csv.')
main()
4.从csv文件生成record文件
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 16 01:04:55 2018
@author: Xiang Guo
由CSV文件生成TFRecord文件
"""
"""
Usage:
# From tensorflow/models/
# Create train data:
python generate_tfrecord.py --csv_input=data/tv_vehicle_labels.csv --output_path=train.record
# Create test data:
python generate_tfrecord.py --csv_input=data/test_labels.csv --output_path=test.record
"""
import os
import io
import pandas as pd
import tensorflow as tf
from PIL import Image
from object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict
flags = tf.app.flags
flags.DEFINE_string('csv_input', '', 'Path to the CSV input')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
FLAGS = flags.FLAGS
# TO-DO replace this with label map
#注意将对应的label改成自己的类别!!!!!!!!!!
def class_text_to_int(row_label):
if row_label == '000-030_000-030':
return 1
elif row_label == '000-030_031-060':
return 2
elif row_label == '000-030_061-090':
return 3
elif row_label == '000-030_091-120':
return 4
elif row_label == '000-030_121-150':
return 5
elif row_label == '000-030_151-180':
return 6
elif row_label == '061-090_000-030':
return 7
elif row_label == '061-090_031-060':
return 8
elif row_label == '061-090_061-090':
return 9
elif row_label == '061-090_091-120':
return 10
elif row_label == '061-090_0121-150':
return 11
elif row_label == '061-090_151-180':
return 12
else:
None
def split(df, group):
data = namedtuple('data', ['filename', 'object'])
gb = df.groupby(group)
return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]
def create_tf_example(group, path):
with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = Image.open(encoded_jpg_io)
width, height = image.size
filename = group.filename.encode('utf8')
image_format = b'jpg'
xmins = []
xmaxs = []
ymins = []
ymaxs = []
classes_text = []
classes = []
for index, row in group.object.iterrows():
xmins.append(row['xmin'] / width)
xmaxs.append(row['xmax'] / width)
ymins.append(row['ymin'] / height)
ymaxs.append(row['ymax'] / height)
classes_text.append(row['class'].encode('utf8'))
classes.append(class_text_to_int(row['class']))
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
def main(csv_input, output_path, image_path):
writer = tf.python_io.TFRecordWriter(output_path)
path = image_path
examples = pd.read_csv(csv_input)
grouped = split(examples, 'filename')
for group in grouped:
tf_example = create_tf_example(group, path)
writer.write(tf_example.SerializeToString())
writer.close()
print('Successfully created the TFRecords: {}'.format(output_path))
if __name__ == '__main__':
# 三个路径分别放置自己的地址以及要生成的tfrecord文档的位置和图片放置的位置
csv_input = r'G:\TensorflowDectionAPI2\images-NV\\test\NV_12_labelTest.csv'
output_path = r'G:\TensorflowDectionAPI2\images-NV\\test\NV_test.tfrecord'
image_path = r'G:\TensorflowDectionAPI2\images-NV\\test_image'
main(csv_input, output_path, image_path)
注:如果自己打的标签是整型,可能会出现None has type NoneType, but expected one of: int, long的错误,那么只要将自己代码中出现的int类型转换为str类型即可,如下所示
def create_tf_example(group, path):
with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = Image.open(encoded_jpg_io)
width, height = image.size
filename = group.filename.encode('utf8')
# image_format = b'png'
image_format = b'jpg'
xmins = []
xmaxs = []
ymins = []
ymaxs = []
classes_text = []
classes = []
for index, row in group.object.iterrows():
xmins.append(row['xmin'] / width)
xmaxs.append(row['xmax'] / width)
ymins.append(row['ymin'] / height)
ymaxs.append(row['ymax'] / height)
# classes_text.append(row['class'])
# print(type(row['class']))下面这两行均行加了强制转换
classes_text.append(str(row['class']).encode('utf8'))
classes.append(class_text_to_int(str(row['class'])))
print(classes)
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
对于我自己随机产生数据图片的代码
# Load the Pandas libraries with alias 'pd'
import pandas as pd
# Read data from file 'filename.csv'
# (in the same directory that your python process is based)
# Control delimiters, rows, column names with read_csv (see later)
import random
from math import *
from scipy.special import jv as besselj
import numpy as np
import matplotlib.pyplot as plt
import scipy.integrate as integrate
# from PIL import Image
from numpy import sqrt, cos, sin, exp
n = 1.518
wl = 532e-3
NA = 1.4
alpha = np.arcsin(NA / n)
an0 = 0
k = (2 * np.pi) / wl
def votex_func( x, y, a, b, th, ph, x0, y0):
# the 和 phi 在这里是弧度值,
#print(x, y, a, b, th, ph, x0, y0)
x = x - x0
y = y - y0
th = th/180 * np.pi
ph = ph/180 * np.pi
Er_func = lambda theta: (sqrt(cos(theta))) * (sin(2 * theta)) * besselj(1, k * sqrt(x ** 2 + y ** 2) * sin(
theta))
Ir = integrate.quad(Er_func, an0, alpha, limit=1000)[0]
Ex = Ir * x / np.sqrt(x ** 2 + y ** 2) if x != 0 else 0
Ey = Ir * y / np.sqrt(x ** 2 + y ** 2) if y != 0 else 0
x1 = cos(ph) * cos(th)
y1 = sin(ph) * cos(th)
x2 = -1 * sin(ph)
y2 = cos(ph)
I = (abs(Ex * x1 + Ey * y1)) ** 2 + (abs(Ex * x2 + Ey * y2)) ** 2
# print(type(I))
return I * a + b
def generateRondom(rows=60, columns=60):
image = np.zeros((rows, columns))
xaxis = np.linspace(-0.6, 0.6, rows)
yaxis = np.linspace(-0.6, 0.6, columns)
'''
for phi in range(180):
for th in range(90):
for ii in range(row):
for jj in range(column):
dataall[ii][jj] = votex_func(x=dataall[ii], y=dataall[jj],a=1, b=0, th=th, ph=phi, x0=0.3, y0=0.3)
plt.imshow(dataall, cmap='gray')
plt.savefig("G:\\TensorflowDectionAPI2\\images-NV\\{0}-{1}.jpg".format(th, phi))
dict = {'theta:{0},phi:{1}'.format(th, phi): dataall}
print(dict)
'''
# 每种50张图片,也就是k=50,phi分成6种,th分成3种,但是theta不会取到30-60
for k in range(50):
th = random.randint(60, 90) #(0,30)
phi = random.randint(150, 180) # randint(a,b) --- 产生a,b范围内的整数,包含开头和结尾
# phi = random.randint(0,180) # randint(a,b) --- 产生a,b范围内的整数,包含开头和结尾
# [(0,30),(0,30)], [(0,30)(30-60)], [(0,30),(60,90)], [(0,30),(90,120)], [(0,30),(120,150)], [(0,30),(150,180)]
# [(60,90),(0,30)], [(60,90)(30-60)], [(60,90),(60,90)], [(60,90),(90,120)], [(60,90),(120,150)], [(60,90),(150,180)]
for ix in range(rows):
for iy in range(columns):
image[ix][iy] = votex_func(x=xaxis[ix], y=yaxis[iy], a=1, b=0, th=th, ph=phi, x0=0., y0=0.)
plt.imshow(image, cmap='gray')
plt.savefig("..\images-NV\classfication12\{0}-{1}.jpg".format(th, phi))
dict = {'theta:{0},phi:{1}'.format(th, phi): image}
print(dict)
if __name__ == '__main__':
generateRondom()