神经网络分类识别数据前期处理

import tensorflow as tf
import numpy as np
import os
from random import shuffle
import cv2 as cv

name_dict = {"BF":0,"BK":1,"BL":2,"BR":3,"CF":4,"CL":5,"CV":6,"CXK":7,"S":8,"XF":9}

data_root_path = "color_part_data_processing/"
test_file_path =  "TXT_doc/test.txt"  #测试集数据集文件
trainer_file_path = "TXT_doc/trainer.txt"  #训练集数据集文件

name_data_list = {}  #记录每类图片有多少训练图片、测试图片

trainer_list = []
test_list = []

#将图片完整路径存入字典
def save_train_test_file(path,name):
    if name not in name_data_list:
        img_list =[]
        img_list.append(path)
        name_data_list[name] = img_list
    else:
        name_data_list[name].append(path)

#遍历数据集目录,提取出图片路径,分训练集、测试集
dirs = os.listdir(data_root_path)
for d in dirs:
    full_path = data_root_path + d
    if os.path.isdir(full_path):
        imgs = os.listdir(full_path)  #列出子目录中所有图片
        for img in imgs:
            save_train_test_file(full_path+ "/" + img, d)

#将字典中的内容写入测试集、训练集文件
with open(test_file_path, "w") as f: #清空测试集文件
    pass
with open(trainer_file_path, "w") as f:  #清空训练集文件
    pass

#遍历字典,分数据
for name,img_list in name_data_list.items():
    i = 0
    num = len(img_list)
    print(f"{name}:{num}张")
    for img in img_list:
        if i % 10 == 0:
            test_list.append(f"{img}\t{name_dict[name]}\n")
        else:
            trainer_list.append(f"{img}\t{name_dict[name]}\n")
        i += 1
with open(trainer_file_path,"w") as f:
    shuffle(trainer_list)
    f.writelines(trainer_list)

with open(test_file_path,"w") as f:
    f.writelines(test_list)

print("---------------------------------------------------之前的代码主要是生成.txt文件便于找到图片和对应的标签-------------------------------------------------")

def generateds(train_list):
    x, y_ = [], []  # x图片数据,y_为标签
    with open(train_list,'r') as f:
        #读取所有行
        lines = [line.strip()for line in f] #对数据进行掐头去尾放入列表
        for line in lines:
            img_path, lab = line.strip().split("\t")
            img = cv.imread(img_path) #读入图片
            img = cv.resize(img,(224,224)) ####对图片进行放缩**********************************
            # img = np.array(img.convert('L')) #将图片变为8位宽灰度值的np.array格式
            img = img / 255 #数据归一化(实现预处理)
            x.append(img) #归一化后的数据,贴到列表x
            y_.append(lab)

    x = np.array(x)
    y_ = np.array(y_)
    y_ = y_.astype(np.int64)
    return x, y_

x_train , y_train = generateds(trainer_file_path)
x_test, y_test = generateds(test_file_path)
x_t = tf.convert_to_tensor(x_train,dtype=tf.float32)
y_t = tf.convert_to_tensor(y_train,dtype=tf.int32)
train_dataset = tf.data.Dataset.from_tensor_slices((x_train,y_train)) #构建数据集对象
train_dataset = train_dataset.batch(32).repeat(10)  #设置批量训练的batch为32,要将训练集重复训练10遍
print("---------------------------------------------网络数据集搭建完成------------------------------------")
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值