一、单通道数据导入以及数据集划分
main
import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import DataLoader
feature, label = open_excel('D:xxx文件位置') #文件导入
split = 0.1 # 测试集占数据集整体的多少
split_val = 0.2# 验证集占数据集整体的多少
ifshuffle = 1 # 1为打乱数据集,0为不打乱
#数据集划分
x_train, x_valT, x_test, y_train, y_valT, y_test = split_data_set(feature, label, split, split_val) #数据集划分 x_train--训练集样本,x_valT--验证集样本,x_test--测试集样本,y_train--训练集标签,y_valT--验证集标签,y_test--测试集标签
#数据集转tensor
x_tr, y_tr = inputtotensor(x_train, y_train) #训练样本和标签tensor格式
x_val, y_val = inputtotensor(x_valT, y_valT) #验证样本和标签tensor格式
x_ts, y_ts = inputtotensor(x_test, y_test) #测试样本和标签tensor格式
train_dataset = torch.utils.data.TensorDataset(x_tr, y_tr)
val_dataset = torch.utils.data.TensorDataset(x_vl, y_vl)
test_dataloader = torch.utils.data.TensorDataset(x_ts, y_ts)
#数据集制作
data_loder_train = DataLoader(dataset=train_dataset, batch_size=128, shuffle=True)
data_loder_val = DataLoader(dataset=val_dataset, batch_size=128, shuffle=True)
data_loder_test = DataLoader(dataset=test_dataloader, batch_size=128, shuffle=False)
dataset
import torch
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import numpy
import pandas as pd
import random
def open_excel(filename):
"""
打开数据集,进行数据处理
:param filename:文件名
:return:特征集数据、标签集数据
"""
readbook = pd.read_excel(f'{filename}.xlsx', engine='openpyxl')
nplist = readbook.T.to_numpy()
data = nplist[0:-1].T
data = np.float64(data)
target = nplist[-1]
return data, target
def random_number(data_size):
"""
创建一个包含0到data_size-1的整数列表,并随机打乱它。
"""
number_set = list(range(data_size))
random.shuffle(number_set)
return number_set
def split_data_set(data_set, target_set, rate, rate_val):
"""
说明:分割数据集,默认rate是测试集所占的比率,rate_val是验证集所占的比率。
:param data_set: 数据集
:param target_set: 标签集
:param rate: 测试集所占的比率
:param rate_val: 验证集所占的比率
:return: 返回训练集数据、验证集数据、测试集数据、训练集标签、验证集标签、测试集标签
"""
total_size = len(data_set)
train_size = int((1 - (rate + rate_val)) * total_size)
val_size = int(rate_val * total_size)
test_size = total_size - train_size - val_size
# 随机获得数据的下标
data_index = random_number(total_size)
# data_index_np = data_index.to_numpy()
# 分割数据集(X表示数据,y表示标签)
x_train = data_set[data_index[:train_size]]
x_val = data_set[data_index[train_size:(train_size + val_size)]]
# 测试集数据
x_test = data_set[data_index[(train_size + val_size):]]
# 训练集标签
y_train = target_set[data_index[:train_size]]
y_val = target_set[data_index[train_size:(train_size + val_size)]]
# 测试集标签
y_test = target_set[data_index[(train_size + val_size):]]
return x_train, x_val, x_test, y_train, y_val, y_test
def inputtotensor(inputtensor, labeltensor):
"""
将数据集的输入和标签转为tensor格式
:param inputtensor: 数据集输入
:param labeltensor: 数据集标签
:return: 输入tensor,标签tensor
"""
inputtensor = np.array(inputtensor)
inputtensor = torch.FloatTensor(inputtensor)
# inputtensor = inputtensor[None, :]
labeltensor = np.array(labeltensor)
labeltensor = labeltensor.astype(float)
labeltensor = torch.LongTensor(labeltensor)
# inputtensor = inputtensor[None, :]
return inputtensor, labeltensor
二、多通道数据导入以及数据集划分
main
import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import DataLoader
# 数据导入
feature, label = open_excel('D:\xxx')
feature1, label1 = open_excel('D:\xxx')
# 数据划分为训练集和测试集和是否打乱数据集
split = 0.1 # 测试集占数据集整体的多少
split_val = 0.2# 验证集占数据集整体的多少
ifshuffle = 1 # 1为打乱数据集,0为不打乱
# 数据划分
x_train, x_val, x_test, x_train1, x_val1, x_test1, y_train, y_val, y_test = split_data_set_TWO(feature, feature1, label, label1, split, split_val)
# 数据转tensor格式
x_tr, y_tr = inputtotensor_two(x_train, x_train1, y_train)
x_vl, y_vl = inputtotensor_two(x_val, x_val1, y_val)
x_ts, y_ts = inputtotensor_two(x_test, x_test1, y_test)
train_dataset = torch.utils.data.TensorDataset(x_tr, y_tr)
val_dataset = torch.utils.data.TensorDataset(x_vl, y_vl)
test_dataloader = torch.utils.data.TensorDataset(x_ts, y_ts)
data_loder_train = DataLoader(dataset=train_dataset, batch_size=128, shuffle=True)
data_loder_val = DataLoader(dataset=val_dataset, batch_size=128, shuffle=True)
data_loder_test = DataLoader(dataset=test_dataloader, batch_size=128, shuffle=False)
dataset
import torch
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import numpy
import pandas as pd
import random
def open_excel(filename):
"""
打开数据集,进行数据处理
:param filename:文件名
:return:特征集数据、标签集数据
"""
readbook = pd.read_excel(f'{filename}.xlsx', engine='openpyxl')
nplist = readbook.T.to_numpy()
data = nplist[0:-1].T
data = np.float64(data)
target = nplist[-1]
return data, target
def random_number(data_size):
"""
创建一个包含0到data_size-1的整数列表,并随机打乱它。
"""
number_set = list(range(data_size))
random.shuffle(number_set)
return number_set
def split_data_set_TWO(data_set, data_set1, target_set, target_set1, rate, rate_val):
"""
说明:分割数据集,默认rate是测试集所占的比率,rate_val是验证集所占的比率。
:param data_set: 数据集
:param target_set: 标签集
:param rate: 测试集所占的比率
:param rate_val: 验证集所占的比率
:return: 返回训练集数据、验证集数据、测试集数据、训练集标签、验证集标签、测试集标签
"""
total_size = len(data_set)
train_size = int((1 - (rate + rate_val)) * total_size)
val_size = int(rate_val * total_size)
test_size = total_size - train_size - val_size
# 随机获得数据的下标
data_index = random_number(total_size)
# 分割数据集(X表示数据,y表示标签)
x_train = data_set[data_index[:train_size]]
# data_index_np = data_index.to_numpy()
x_val = data_set[data_index[train_size:(train_size + val_size)]]
# 测试集数据
x_test = data_set[data_index[(train_size + val_size):]]
# 分割数据集(X表示数据,y表示标签)
x_train1 = data_set1[data_index[:train_size]]
x_val1 = data_set1[data_index[train_size:(train_size + val_size)]]
# 测试集数据
x_test1 = data_set1[data_index[(train_size + val_size):]]
# 训练集标签
y_train = target_set[data_index[:train_size]]
y_val = target_set[data_index[train_size:(train_size + val_size)]]
# 测试集标签
y_test = target_set[data_index[(train_size + val_size):]]
# 训练集标签
y_train1 = target_set1[data_index[:train_size]]
y_val1 = target_set1[data_index[train_size:(train_size + val_size)]]
# 测试集标签
y_test1 = target_set1[data_index[(train_size + val_size):]]
return x_train, x_val, x_test, x_train1, x_val1, x_test1, y_train, y_val, y_test
def inputtotensor_two(inputtensor, inputtensor1, labeltensor):
"""
将数据集的输入和标签转为tensor格式
:param inputtensor: 数据集输入
:param labeltensor: 数据集标签
:return: 输入tensor,标签tensor
"""
inputtensor = np.array(inputtensor)
inputtensor1 = np.array(inputtensor1)
inputtensor = torch.FloatTensor(inputtensor)
inputtensor1 = torch.FloatTensor(inputtensor1)
x = torch.cat([inputtensor, inputtensor1], dim=1)
# inputtensor = inputtensor[None, :]
labeltensor = np.array(labeltensor)
labeltensor = labeltensor.astype(float)
labeltensor = torch.LongTensor(labeltensor)
# inputtensor = inputtensor[None, :]
return x, labeltensor