1 、高维数组
1.1 回归数据
回归数据主要是波士顿房价的数据,连续对应回归
#回归数据
import torch
import torch.utils.data as Data
from sklearn.datasets import load_boston,load_iris
import numpy as np
#整个程序的流程如下:
#(1)load_boston:加载数据,得到训练集trian_x,train_y。数据类型为np.float32,需要——>np.float64
# (2)trian_x,trian_y转为np.float
# (3)再将训练集转化为Tensor
#(4)将上面的张量数据集送入加载器DataLoader
#(5)再用for setp,(x,y) in enumerate(train_loader): 得到数据和标签,x是数据,y是标签
boston_x,boston_y = load_boston(return_X_y=True)
#load_boston:如果是True的话,返回(data, target)
#506处波士顿不同地理位置的房产的房价数据(因变量),和与之对应的包含房屋以及房屋周围的详细信息(自量),其中包含城镇犯罪率、一氧化氮浓度、住宅平均房间数、到中心区域的加权距离以及自住房平均房价等13个维度的数据
# print(boston_x)
# print(boston_y)
# print(boston_x.shape) #(506, 13)
# print(boston_y.shape) #(506,)
# print('boston_x.dtype',boston_x.dtype)
# print('boston_y.dtype',boston_y.dtype)
train_xt = torch.from_numpy(boston_x.astype(np.float32))
train_yt = torch.from_numpy(boston_y.astype(np.float32))
# print('train_xt.shape',train_xt.shape)
# print('train_yt.shape',train_yt.shape)
# print('train_xt.dtype',train_xt.dtype)
# print('train_yt.dtype',train_yt.dtype)
train_data = Data.TensorDataset(train_xt,train_yt)
train_loader = Data.DataLoader(
dataset= train_data,
batch_size=64,
shuffle=True,
num_workers=0,
)
# num_workers = 0 :代表不使用多线程
#检测训练数据集的一个batch的样本维度是否正确
for step , (b_x,b_y) in enumerate(train_loader):
# print(step) #这里不太明白step是什么意思
if step >0:
break
print('b_x.shape',b_x.shape) #b_x.shape torch.Size([64, 13]),本来一个batch即原数据boston_x为[506,13],现在用上面的数据加载器里面定义的batchsize改成了64,即减小了输入的长度,更快训练?
print('b_y.shape',b_y.shape) #b_y.shape torch.Size([64])
print('b_x.dtype',b_x.dtype)
print('b_y.dtype',b_y.dtype)
1.2 分类数据
回归数据主要是波士顿房价的数据,离散对应分类
#分类数据
import torch
import torch.utils.data as Data
from sklearn.datasets import load_iris
import numpy as np
# (1)加载数据:load_iris(return_X_y=True)
# (2)数据转为浮点数32位:train_x = torch.from_numpy(iris_x.astype(np.float32))
# (3)数据再转为Tensor:train_data = Data.TensorDataset(train_x,train_y)
# (4)送入数据进加载器,设batchsize等:train_loader = Data.DataLoader(dataset=train_data)
# (5)获得一个新定义的batch数据:for setp,(i_x,i_y) in enumerate(train_loader):
iris_x,iris_y = load_iris(return_X_y=True)
# print(iris_x.shape) #(150, 4)
# print(iris_y.shape) #(150,)
train_xt = torch.from_numpy(iris_x.astype(np.float32))
train_yt = torch.from_numpy(iris_y.astype(np.float32))
train_data = Data.TensorDataset(train_xt,train_yt)
train_loader = Data.DataLoader(
dataset=train_data,
batch_size=10,
shuffle=True,
num_workers=0,
)
for step,(i_x,i_y) in enumerate(train_loader):
if step>0:
break
# print(i_x.shape) #torch.Size([10, 4])
# print(i_y.shape) #torch.Size([10])
# print(i_x)
# print(i_y)
2、图像数据
图像数据主要是利用torchvision中的datasets模块实现
2.1 从torchvision中的datasets模块导入数据并预处理
#图像数据
import torch
import torch.utils.data as Data
from torchvision.datasets import FashionMNIST
import torchvision.transforms as transforms
#(1)准备数据
#(2)定义加载器
#(3)获得数据和标签
# test_data_x = test_data.data
# test_data_y = test_data.targets
#训练集
train_data = FashionMNIST(
root="E:\Master_Cource\高级编程\propcess",
train=True,
transform = transforms.ToTensor(),
download= True #表示现在下载到上面的路径
)
train_loader = Data.DataLoader(
dataset=train_data,
batch_size=64,
shuffle=True,
num_workers=2,
)
print("trian_loader的batchsize数量:",len(train_loader))
#测试集
test_data = FashionMNIST(
root="E:\Master_Cource\高级编程\propcess",
train=False,
download= False #上面的下载就是训练和测试一起下载了的
)
test_data_x = test_data.data.type(torch.FloatTensor)/255.0 #归一化
# print(test_data_x.shape) #torch.Size([10000, 28, 28])
test_data_x = torch.unsqueeze(test_data_x,dim=1) #torch.Size([10000, 1, 28, 28]) 对数据维度的扩充,在维度1加一维
# test_data_x = torch.unsqueeze(test_data_x,dim=2) #torch.Size([10000, 28, 1, 28])
# print(test_data_x.shape)
test_data_y = test_data.targets
# print(test_data_y)
# print(test_data_y.shape) #torch.Size([10000]),对应测试集的10000张图片
2.2 从本地文件夹导入数据并预处理
#从文件夹读取数据
import torch
import torch.utils.data as Data
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
#(1)数据预处理
#(2)读取下载好的数据train_dir,用ImageFolder处理读取的数据
#(3)设置数据加载器
#(4)处理batchsize
#预处理准备
train_data_transforms = transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485,0.456,0.406],
[0.229,0.224,0.225])
])
train_data_dir = ""
train_data = ImageFolder(train_data_dir,transform=train_data_transforms)
train_data_loader = Data.DataLoader(
train_data,
batch_size=4,
shuffle=True,
num_workers=1
)
print('train_data.targets:',train_data.targets)
for setp,(b_x,b_y) in enumerate(train_data_loader):
if setp>0:
break
print(b_x.shape)
print(b_y.shape)
print('图像的取值范围:',b_x.min(),"-",b_x.max())