import tensorflow as tf
import numpy as np
import os
from random import shuffle
import cv2 as cv
name_dict = {"BF":0,"BK":1,"BL":2,"BR":3,"CF":4,"CL":5,"CV":6,"CXK":7,"S":8,"XF":9}
data_root_path = "color_part_data_processing/"
test_file_path = "TXT_doc/test.txt" #测试集数据集文件
trainer_file_path = "TXT_doc/trainer.txt" #训练集数据集文件
name_data_list = {} #记录每类图片有多少训练图片、测试图片
trainer_list = []
test_list = []
#将图片完整路径存入字典
def save_train_test_file(path,name):
if name not in name_data_list:
img_list =[]
img_list.append(path)
name_data_list[name] = img_list
else:
name_data_list[name].append(path)
#遍历数据集目录,提取出图片路径,分训练集、测试集
dirs = os.listdir(data_root_path)
for d in dirs:
full_path = data_root_path + d
if os.path.isdir(full_path):
imgs = os.listdir(full_path) #列出子目录中所有图片
for img in imgs:
save_train_test_file(full_path+ "/" + img, d)
#将字典中的内容写入测试集、训练集文件
with open(test_file_path, "w") as f: #清空测试集文件
pass
with open(trainer_file_path, "w") as f: #清空训练集文件
pass
#遍历字典,分数据
for name,img_list in name_data_list.items():
i = 0
num = len(img_list)
print(f"{name}:{num}张")
for img in img_list:
if i % 10 == 0:
test_list.append(f"{img}\t{name_dict[name]}\n")
else:
trainer_list.append(f"{img}\t{name_dict[name]}\n")
i += 1
with open(trainer_file_path,"w") as f:
shuffle(trainer_list)
f.writelines(trainer_list)
with open(test_file_path,"w") as f:
f.writelines(test_list)
print("---------------------------------------------------之前的代码主要是生成.txt文件便于找到图片和对应的标签-------------------------------------------------")
def generateds(train_list):
x, y_ = [], [] # x图片数据,y_为标签
with open(train_list,'r') as f:
#读取所有行
lines = [line.strip()for line in f] #对数据进行掐头去尾放入列表
for line in lines:
img_path, lab = line.strip().split("\t")
img = cv.imread(img_path) #读入图片
img = cv.resize(img,(224,224)) ####对图片进行放缩**********************************
# img = np.array(img.convert('L')) #将图片变为8位宽灰度值的np.array格式
img = img / 255 #数据归一化(实现预处理)
x.append(img) #归一化后的数据,贴到列表x
y_.append(lab)
x = np.array(x)
y_ = np.array(y_)
y_ = y_.astype(np.int64)
return x, y_
x_train , y_train = generateds(trainer_file_path)
x_test, y_test = generateds(test_file_path)
x_t = tf.convert_to_tensor(x_train,dtype=tf.float32)
y_t = tf.convert_to_tensor(y_train,dtype=tf.int32)
train_dataset = tf.data.Dataset.from_tensor_slices((x_train,y_train)) #构建数据集对象
train_dataset = train_dataset.batch(32).repeat(10) #设置批量训练的batch为32,要将训练集重复训练10遍
print("---------------------------------------------网络数据集搭建完成------------------------------------")
神经网络分类识别数据前期处理
最新推荐文章于 2024-07-26 17:36:26 发布