tensorflow2视力表E字符(学习笔记)--制作自己的数据集

最新推荐文章于 2023-03-02 19:55:40 发布

菜刀l四庭柱

最新推荐文章于 2023-03-02 19:55:40 发布

阅读量603

点赞数 1

文章标签： python

本文链接：https://blog.csdn.net/qq_51729332/article/details/117267269

版权

本文介绍了如何使用Python库进行图像数据增强，包括大小调整、噪声注入和随机翻转旋转，以创建更丰富的训练集。同时，通过labelimg软件进行图像标注，生成XML文件，为深度学习模型的训练准备数据。整个过程涵盖了图像预处理和数据集构建的关键步骤。

摘要由CSDN通过智能技术生成

网上收集E字符图片，完成数据扩充
下图是我收集的图片
在这里插入图片描述

声明:

import tensorflow as tf 
import numpy as np 
import matplotlib.pyplot as plt
import os

定义噪声函数:

def salt_and_pepper_noise(X):  #加噪声
	X=X/255
	noise_factor = 0.5
	X = X + noise_factor * np.random.normal(loc = 0.0,scale = 1.0,size =X.shape)
	X = np.clip(X,0.,1.)
	return X*255

定义修改图片函数，在其中调用噪声函数，修改完图片后并保存:

def resize_function(img_path,save_path,string):  #加载图片，修改大小，保存
    img = tf.io.read_file(img_path)
    print(img_path)
    # 解码图片
    # img = tf.image.decode_png(img,channels=3) # RGBA,PNG
    img = tf.image.decode_jpeg(img,channels=3) # RGBA,jpg
    hight = img.shape[0]
    width = img.shape[1]
    save_path=save_path+string+'.'  #save_path=D:/save_image/down.
    print('original shape:',img.shape)
    for x in range(1,5,1):
    	img2=img
    	save_to_path=save_path+str(x)+'.jpg'
    	img = tf.image.resize(img,[int(hight*0.5*x),int(width*0.5*x)])
    	if x%2==1 :
    		img
    	else:
    		img=salt_and_pepper_noise(img)	
    	print('resized shape:',img.shape)
    # 转换张量数据类型
    	img = tf.cast(img, dtype=tf.uint8)
    # 编码为图片
    # img = tf.image.encode_png(img) # PNG
    	img = tf.image.encode_jpeg(img) # jpeg
    # 保存图片
    	with tf.io.gfile.GFile(save_to_path,'wb') as file:
        	file.write(img.numpy())
        	img=img2

读取图片,根据自身需求可以多下载些E字符(我太懒了),改变上方代码for x in range(1,5,1)中的5为更大值也可以增加数据，当然只有大小(我要的数据集和这还差一些，所以16张我就够了):

my_path='D:/load_image/'
save_path='D:/save_image/'

def load_datas(path):
	up=[]
	down=[]
	left=[]
	righ=[]
	for file in os.listdir(path):
		name = file.split(sep='.')
		if name[0] == 'down':
			down.append(path+file)
		elif name[0]=='up':
			up.append(path+file)
		elif name[0]=='left':
			left.append(path+file)
		else:
			righ.append(path+file)
	for path_img in up:
		resize_function(path_img,save_path,'up')
	for path_img in down:
		resize_function(path_img,save_path,'down')
	for path_img in left:
		resize_function(path_img,save_path,'left')
	for path_img in righ:
		resize_function(path_img,save_path,'right')

最后运行

load_datas(my_path)

得到
在这里插入图片描述

最后制作:
在这里插入图片描述
(由于技术有限上图采用word文档制作，小声bb…)
然后将上图图片数据增强，翻转，旋转一定的角度:

def random_turn_save(x,save_path,num):
	for rad in np.arange(0.0,0.3,0.07):
		save_to_path=save_path+'E.'+str(num)+'.jpg'
		image=x
		image=tf.image.random_brightness(image,max_delta=3) #随机调节亮度
		image=tf.image.random_flip_left_right(image)#随机左右翻转
		image=tf.image.random_flip_up_down(image) #随机上下翻转
		image=tfa.image.rotate(image,rad) #旋转一定的角度
		image=tf.cast(image,dtype=tf.uint8)
		image=tf.image.encode_jpeg(image)
		with tf.io.gfile.GFile(save_to_path,'wb') as file:
			file.write(image.numpy())
		num+=1
	return num    	


def Read_file(path):
	img=tf.io.read_file(path)
	img=tf.image.decode_jpeg(img,channels=3)
	img=tf.image.resize(img,(256,256))
	img=tf.cast(img,tf.float32)
	return img

num=0
for file in os.listdir(path):
	new_path=path+file
	image=Read_file(new_path)
	num=random_turn_save(image,save_path,num)