【笔记】pytorch 中的广播:tensor对tensor、array对array、array对list,这三种不会报错,可以正常进行广播运算

本文介绍了如何使用PyTorch对图像数据进行预处理,包括读取文件、转换为Tensor、计算并应用均值和标准差。通过mydataset类演示了如何从目录中加载图像,调整像素值,归一化,并获取每批次数据的统计特性。
摘要由CSDN通过智能技术生成

tensor 对 tensor:OK

# std,mean求取程序
from torch.utils.data import Dataset,DataLoader
import torch
import os
from PIL import Image
import numpy as np


# target1=torch.tensor([])
# target2=torch.tensor([])
# target3=torch.tensor([])
#
# mean=torch.tensor([])
# i=0


class mydataset(Dataset):
    def __init__(self,path):
        self.path=path
        self.dataset=os.listdir(self.path)
        self.mean=[0.4876, 0.4542, 0.4166]
        self.mean=np.array(self.mean)
        self.mean=torch.tensor(self.mean)
        self.std=[0.2624, 0.2558, 0.2580]
        self.std=np.array(self.std)
        self.std=torch.tensor(self.std)

        # self.dataset=sorted(self.dataset,key = lambda x: int(x[2:-5]))
        # self.dataset.sort(key=lambda x: int(x.split('0.')[1].split('.jpeg')[0]))  #  IndexError: list index out of range
        # self.dataset.sort(key=lambda x: int(x.split(".")[1]))  # ['0.1.jpeg', '1.1.jpeg', '0.2.jpeg', '1.2.jpeg', '0.3.jpeg',

        # print(self.dataset[0].split('.')[:])   # ['0', '1', 'jpeg']
        # print(type(self.dataset[0].split('.')[0]))  # <class 'str'>
        # input()
        # print(self.dataset)
        # input()
    def __len__(self):
        return len(self.dataset)
    def __getitem__(self,index):

        name=self.dataset[index]
        data=Image.open(os.path.join(self.path,name))
        # print(data[0].shape())  #   print(data[0])   TypeError: 'JpegImageFile' object is not subscriptable

        # img 不转化为数组,保持<class 'PIL.JpegImagePlugin.JpegImageFile'> 将会报错
        data = np.array(data)/255
        data = torch.tensor(data)
        print(data.shape,type(data),type(self.mean),self.mean.shape)

        data = (data - self.mean) / self.std
        print("广播 OK")
        input()
        print(data)
        #print(data.shape())  #'tuple' object is not callable   tuple()本身是内置函数,被重新定义了变量后,导致原函数失效。
        # print(data.shape) # (100, 100, 3)
        data=torch.tensor(data,dtype=torch.float32).permute(2,1,0)

        # print(data.shape,data.dtype)  # torch.Size([3, 100, 100]) torch.float32
        # data=(data-self.mean)/self.std
        # img = np.array(data) / 255   #  TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'PIL.JpegImagePlugin.JpegImageFile'>
        # # 去均值
        # img = (img - self.mean) / self.std

        # 换轴 H W C 转换为 C H W ,这里需要注意下,其实我们可以不这么处理,在前面设置图片大小的时候设置为3 * 100 * 100 的就可以。
        # data = torch.tensor(img, dtype=torch.float32).permute(2, 0, 1)

        # print(data.size())
        # input()
        name_list=name.split(".")
        # print(name_list)
        # str->int
        target=int(name_list[0])
        target=torch.tensor(target)


        return data,target


import torch.tensor as tensor
if __name__=="__main__":
    a=mydataset(r"./cat_dog/img")

    data_loader = DataLoader(dataset=a, batch_size=12000, shuffle=False)
    data=next(iter(data_loader))[0]
    mean = torch.mean(data, dim=(0, 2, 3))
    std = torch.std(data, dim=(0, 2, 3))
    print(mean,std)
    # TypeError: __getitem__() missing 1 required positional argument: 'index'
    # a.__getitem__()

    # data_loader=DataLoader(dataset=a,batch_size=12000,shuffle=False)
    # for i,j in data_loader:
    #     mean=torch.mean(i,dim=(0,2,3))
    #     std=torch.std(i,dim(0,2,3))
    #     print(mean,std)




torch.Size([100, 100, 3]) <class 'torch.Tensor'> <class 'torch.Tensor'> torch.Size([3])
广播 OK

array 对 tensor:NO

array 对 array:OK


# std,mean求取程序
from torch.utils.data import Dataset,DataLoader
import torch
import os
from PIL import Image
import numpy as np


# target1=torch.tensor([])
# target2=torch.tensor([])
# target3=torch.tensor([])
#
# mean=torch.tensor([])
# i=0


class mydataset(Dataset):
    def __init__(self,path):
        self.path=path
        self.dataset=os.listdir(self.path)
        self.mean=[0.4876, 0.4542, 0.4166]
        self.mean=np.array(self.mean)
        # self.mean=torch.tensor(self.mean)
        self.std=[0.2624, 0.2558, 0.2580]
        self.std=np.array(self.std)
        # self.std=torch.tensor(self.std)

        # self.dataset=sorted(self.dataset,key = lambda x: int(x[2:-5]))
        # self.dataset.sort(key=lambda x: int(x.split('0.')[1].split('.jpeg')[0]))  #  IndexError: list index out of range
        # self.dataset.sort(key=lambda x: int(x.split(".")[1]))  # ['0.1.jpeg', '1.1.jpeg', '0.2.jpeg', '1.2.jpeg', '0.3.jpeg',

        # print(self.dataset[0].split('.')[:])   # ['0', '1', 'jpeg']
        # print(type(self.dataset[0].split('.')[0]))  # <class 'str'>
        # input()
        # print(self.dataset)
        # input()
    def __len__(self):
        return len(self.dataset)
    def __getitem__(self,index):

        name=self.dataset[index]
        data=Image.open(os.path.join(self.path,name))
        # print(data[0].shape())  #   print(data[0])   TypeError: 'JpegImageFile' object is not subscriptable

        # img 不转化为数组,保持<class 'PIL.JpegImagePlugin.JpegImageFile'> 将会报错
        data = np.array(data)/255
        # data = torch.tensor(data)
        print(data.shape,type(data),type(self.mean),self.mean.shape)

        data = (data - self.mean) / self.std
        print("广播 OK")
        input()
        print(data)
        #print(data.shape())  #'tuple' object is not callable   tuple()本身是内置函数,被重新定义了变量后,导致原函数失效。
        # print(data.shape) # (100, 100, 3)
        data=torch.tensor(data,dtype=torch.float32).permute(2,1,0)

        # print(data.shape,data.dtype)  # torch.Size([3, 100, 100]) torch.float32
        # data=(data-self.mean)/self.std
        # img = np.array(data) / 255   #  TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'PIL.JpegImagePlugin.JpegImageFile'>
        # # 去均值
        # img = (img - self.mean) / self.std

        # 换轴 H W C 转换为 C H W ,这里需要注意下,其实我们可以不这么处理,在前面设置图片大小的时候设置为3 * 100 * 100 的就可以。
        # data = torch.tensor(img, dtype=torch.float32).permute(2, 0, 1)

        # print(data.size())
        # input()
        name_list=name.split(".")
        # print(name_list)
        # str->int
        target=int(name_list[0])
        target=torch.tensor(target)


        return data,target


import torch.tensor as tensor
if __name__=="__main__":
    a=mydataset(r"./cat_dog/img")

    data_loader = DataLoader(dataset=a, batch_size=12000, shuffle=False)
    data=next(iter(data_loader))[0]
    mean = torch.mean(data, dim=(0, 2, 3))
    std = torch.std(data, dim=(0, 2, 3))
    print(mean,std)
    # TypeError: __getitem__() missing 1 required positional argument: 'index'
    # a.__getitem__()

    # data_loader=DataLoader(dataset=a,batch_size=12000,shuffle=False)
    # for i,j in data_loader:
    #     mean=torch.mean(i,dim=(0,2,3))
    #     std=torch.std(i,dim(0,2,3))
    #     print(mean,std)







(100, 100, 3) <class 'numpy.ndarray'> <class 'numpy.ndarray'> (3,)
广播 OK

list 对 array:OK

# std,mean求取程序
from torch.utils.data import Dataset,DataLoader
import torch
import os
from PIL import Image
import numpy as np


# target1=torch.tensor([])
# target2=torch.tensor([])
# target3=torch.tensor([])
#
# mean=torch.tensor([])
# i=0


class mydataset(Dataset):
    def __init__(self,path):
        self.path=path
        self.dataset=os.listdir(self.path)
        self.mean=[0.4876, 0.4542, 0.4166]
        # self.mean=np.array(self.mean)
        # self.mean=torch.tensor(self.mean)
        self.std=[0.2624, 0.2558, 0.2580]
        # self.std=np.array(self.std)
        # self.std=torch.tensor(self.std)

        # self.dataset=sorted(self.dataset,key = lambda x: int(x[2:-5]))
        # self.dataset.sort(key=lambda x: int(x.split('0.')[1].split('.jpeg')[0]))  #  IndexError: list index out of range
        # self.dataset.sort(key=lambda x: int(x.split(".")[1]))  # ['0.1.jpeg', '1.1.jpeg', '0.2.jpeg', '1.2.jpeg', '0.3.jpeg',

        # print(self.dataset[0].split('.')[:])   # ['0', '1', 'jpeg']
        # print(type(self.dataset[0].split('.')[0]))  # <class 'str'>
        # input()
        # print(self.dataset)
        # input()
    def __len__(self):
        return len(self.dataset)
    def __getitem__(self,index):

        name=self.dataset[index]
        data=Image.open(os.path.join(self.path,name))
        # print(data[0].shape())  #   print(data[0])   TypeError: 'JpegImageFile' object is not subscriptable

        # img 不转化为数组,保持<class 'PIL.JpegImagePlugin.JpegImageFile'> 将会报错
        data = np.array(data)/255
        # data = torch.tensor(data)
        # print(data.shape,type(data),type(self.mean),self.mean.shape)

        data = (data - self.mean) / self.std
        print("广播 OK")
        input()
        print(data)
        #print(data.shape())  #'tuple' object is not callable   tuple()本身是内置函数,被重新定义了变量后,导致原函数失效。
        # print(data.shape) # (100, 100, 3)
        data=torch.tensor(data,dtype=torch.float32).permute(2,1,0)

        # print(data.shape,data.dtype)  # torch.Size([3, 100, 100]) torch.float32
        # data=(data-self.mean)/self.std
        # img = np.array(data) / 255   #  TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'PIL.JpegImagePlugin.JpegImageFile'>
        # # 去均值
        # img = (img - self.mean) / self.std

        # 换轴 H W C 转换为 C H W ,这里需要注意下,其实我们可以不这么处理,在前面设置图片大小的时候设置为3 * 100 * 100 的就可以。
        # data = torch.tensor(img, dtype=torch.float32).permute(2, 0, 1)

        # print(data.size())
        # input()
        name_list=name.split(".")
        # print(name_list)
        # str->int
        target=int(name_list[0])
        target=torch.tensor(target)


        return data,target


import torch.tensor as tensor
if __name__=="__main__":
    a=mydataset(r"./cat_dog/img")

    data_loader = DataLoader(dataset=a, batch_size=12000, shuffle=False)
    data=next(iter(data_loader))[0]
    mean = torch.mean(data, dim=(0, 2, 3))
    std = torch.std(data, dim=(0, 2, 3))
    print(mean,std)
    # TypeError: __getitem__() missing 1 required positional argument: 'index'
    # a.__getitem__()

    # data_loader=DataLoader(dataset=a,batch_size=12000,shuffle=False)
    # for i,j in data_loader:
    #     mean=torch.mean(i,dim=(0,2,3))
    #     std=torch.std(i,dim(0,2,3))
    #     print(mean,std)







广播 OK

但是加上339行会报错:

tensor 对 list:NO

不可以广播:

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

程序猿的探索之路

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值