##前言
这块是数据处理的第二部分,关于图片处理和多线程的部分,让我们一起来看看吧。
get_training_data()
def get_training_data():
img_files = []
exts = ['jpg', 'png', 'jpeg', 'JPG']
for parent, dirnames, filenames in os.walk(os.path.join(DATA_FOLDER, "image")):
for filename in filenames:
for ext in exts:
if filename.endswith(ext):
img_files.append(os.path.join(parent, filename))
break
print('Find {} images'.format(len(img_files)))
return img_files
这块是判断图片的后缀是否为规定格式,如果是就将图片路径返回。
load_annoataion()
def load_annoataion(p):
bbox = []
with open(p, "r") as f:
lines = f.readlines()
for line in lines:
line = line.strip().split(",")
x_min, y_min, x_max, y_max = map(int, line)
bbox.append([x_min, y_min, x_max, y_max, 1])
return bbox
f.readlines()#读取的行数据包含换行符,line.strip().split(",")分离符号并且剥离符号和无用的调试信息然后将坐标作为bbox返回。
generator()
def generator(vis=False):
image_list = np.array(get_training_data())
print('{} training images in {}'.format(image_list.shape[0], DATA_FOLDER))
index = np.arange(0, image_list.shape[0])
while True:
np.random.shuffle(index)
for i in index:
try:
im_fn = image_list[i]
im = cv2.imread(im_fn)
h, w, c = im.shape
im_info = np.array([h, w, c]).reshape([1, 3])
_, fn = os.path.split(im_fn)
fn, _ = os.path.splitext(fn)
txt_fn = os.path.join(DATA_FOLDER, "label", fn + '.txt')
if not os.path.exists(txt_fn):
print("Ground truth for image {} not exist!".format(im_fn))
continue
bbox = load_annoataion(txt_fn)
if len(bbox) == 0:
print("Ground truth for image {} empty!".format(im_fn))
continue
if vis:
for p in bbox:
cv2.rectangle(im, (p[0], p[1]), (p[2], p[3]), color=(0, 0, 255), thickness=1)
fig, axs = plt.subplots(1, 1, figsize=(30, 30))
axs.imshow(im[:, :, ::-1])
axs.set_xticks([])
axs.set_yticks([])
plt.tight_layout()
plt.show()
plt.close()
yield [im], bbox, im_info
except Exception as e:
print(e)
continue
调用前面函数对图片进行读取,vis表示是否展示处理效果。通过cv2将处理后的图片读取进来,然后将shape重置,分理出路径,文件名,扩展名,将txt标签文件重组,判断文件存在性,将文本框载入,将处理后的图片显示出来cv2.rectangle可以画出最小画框,,后面的就是将处理的结果显示出来。
get_batch()
def get_batch(num_workers, **kwargs):
try:
enqueuer = GeneratorEnqueuer(generator(**kwargs), use_multiprocessing=True)
enqueuer.start(max_queue_size=24, workers=num_workers)
generator_output = None
while True:
while enqueuer.is_running():
if not enqueuer.queue.empty():
generator_output = enqueuer.queue.get()
break
else:
time.sleep(0.01)
yield generator_output
generator_output = None
finally:
if enqueuer is not None:
enqueuer.stop()
这块是多线程处理,本人还不是特别了解就不去做解读,推荐一位大神的博文https://blog.csdn.net/qq_35307005/article/details/89929403?depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-16&utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-16。
写在后面
这里就是图片及展示的全部不分了这里写一下代码路径,方便大家找,以前忘了这回事。。。。。。utils/dataset(data_provider),如有错误还望指正,在此向各位前辈致以诚挚谢意。