神经网络分类识别数据前期处理

最新推荐文章于 2024-07-26 17:36:26 发布

小菜白找码码

最新推荐文章于 2024-07-26 17:36:26 发布

阅读量75

点赞数

文章标签： python 深度学习 tensorflow Powered by 金山文档

本文链接：https://blog.csdn.net/baseclanguage/article/details/129320482

版权

import tensorflow as tf
import numpy as np
import os
from random import shuffle
import cv2 as cv

name_dict = {"BF":0,"BK":1,"BL":2,"BR":3,"CF":4,"CL":5,"CV":6,"CXK":7,"S":8,"XF":9}

data_root_path = "color_part_data_processing/"
test_file_path =  "TXT_doc/test.txt"  #测试集数据集文件
trainer_file_path = "TXT_doc/trainer.txt"  #训练集数据集文件

name_data_list = {}  #记录每类图片有多少训练图片、测试图片

trainer_list = []
test_list = []

#将图片完整路径存入字典
def save_train_test_file(path,name):
    if name not in name_data_list:
        img_list =[]
        img_list.append(path)
        name_data_list[name] = img_list
    else:
        name_data_list[name].append(path)

#遍历数据集目录，提取出图片路径，分训练集、测试集
dirs = os.listdir(data_root_path)
for d in dirs:
    full_path = data_root_path + d
    if os.path.isdir(full_path):
        imgs = os.listdir(full_path)  #列出子目录中所有图片
        for img in imgs:
            save_train_test_file(full_path+ "/" + img, d)

#将字典中的内容写入测试集、训练集文件
with open(test_file_path, "w") as f: #清空测试集文件
    pass
with open(trainer_file_path, "w") as f:  #清空训练集文件
    pass

#遍历字典，分数据
for name,img_list in name_data_list.items():
    i = 0
    num = len(img_list)
    print(f"{name}:{num}张")
    for img in img_list:
        if i % 10 == 0:
            test_list.append(f"{img}\t{name_dict[name]}\n")
        else:
            trainer_list.append(f"{img}\t{name_dict[name]}\n")
        i += 1
with open(trainer_file_path,"w") as f:
    shuffle(trainer_list)
    f.writelines(trainer_list)

with open(test_file_path,"w") as f:
    f.writelines(test_list)

print("---------------------------------------------------之前的代码主要是生成.txt文件便于找到图片和对应的标签-------------------------------------------------")

def generateds(train_list):
    x, y_ = [], []  # x图片数据，y_为标签
    with open(train_list,'r') as f:
        #读取所有行
        lines = [line.strip()for line in f] #对数据进行掐头去尾放入列表
        for line in lines:
            img_path, lab = line.strip().split("\t")
            img = cv.imread(img_path) #读入图片
            img = cv.resize(img,(224,224)) ####对图片进行放缩**********************************
            # img = np.array(img.convert('L')) #将图片变为8位宽灰度值的np.array格式
            img = img / 255 #数据归一化（实现预处理）
            x.append(img) #归一化后的数据，贴到列表x
            y_.append(lab)

    x = np.array(x)
    y_ = np.array(y_)
    y_ = y_.astype(np.int64)
    return x, y_

x_train , y_train = generateds(trainer_file_path)
x_test, y_test = generateds(test_file_path)
x_t = tf.convert_to_tensor(x_train,dtype=tf.float32)
y_t = tf.convert_to_tensor(y_train,dtype=tf.int32)
train_dataset = tf.data.Dataset.from_tensor_slices((x_train,y_train)) #构建数据集对象
train_dataset = train_dataset.batch(32).repeat(10)  #设置批量训练的batch为32，要将训练集重复训练10遍
print("---------------------------------------------网络数据集搭建完成------------------------------------")