Pytorch 创建自己的数据DataSet 并加载多张图像数据到input tensor

#DataLoader.py

import torch
from torch.utils.data import Dataset,DataLoader
import numpy as np
import os
import torch.utils.data as data
from PIL import Image
import cv2
import os.path
from numpy.random import randint
import torchvision
import glob
import random
import time
from transforms import *
#存储数据列表文档所在目录
'''
 video_datasets -  'category.txt'
                -  'train_videofolder.txt'
                -  'val_videofolder.txt'
'''
ROOT_DATASET = 'video_datasets'
def return_ucf101(modality):
    filename_categories = 'category.txt'
    if modality == 'RGB':
        #图像所在目录
        root_data = '/home/enbo/share/UCF101'
        #root_data = '/mnt/localssd1/bzhou/something/20bn-something-something-v1'
        filename_imglist_train = 'train_videofolder.txt'
        filename_imglist_val = 'val_videofolder.txt'
        prefix = '{:04d}.jpg'
    elif modality == 'Flow':
        root_data = '/data/vision/oliva/scratch/bzhou/video/something-something/flow'
        #root_data = '/mnt/localssd1/bzhou/something/flow'
        filename_imglist_train = 'something/train_videofolder.txt'
        filename_imglist_val = 'something/val_videofolder.txt'
        prefix = '{:05d}.jpg'
    else:
        print('no such modality:' + modality)
        os.exit()
    return filename_categories, filename_imglist_train, filename_imglist_val, root_data, prefix

def return_dataset_list(dataset,modality):
    dataset_dict = {'UCF101':return_ucf101}
    if dataset in dataset_dict:
        file_categories,file_imglist_train,file_imglist_val,root_data,prefix = dataset_dict[dataset](modality)
    else:
        raise ValueError('Unknown dataset ' + dataset)
    file_imglist_train = os.path.join(ROOT_DATASET, file_imglist_train)
    file_imglist_val = os.path.join(ROOT_DATASET, file_imglist_val)
    file_categories = os.path.join(ROOT_DATASET, file_categories)
    with open(file_categories) as f:
        lines = f.readlines()
    categories = [item.rstrip() for item in lines]
    return categories, file_imglist_train, file_imglist_val, root_data, prefix

class VideoRecord(object):
    def __init__(self, row):
        self._data = row

    @property
    def path(self):
        return self._data[0]

    @property
    def num_frames(self):
        return int(self._data[1])

    @property
    def label(self):
        return int(self._data[2])


class MydataSet(data.Dataset):

    def __init__(self, root_path, list_file, num_segment =3,
                 new_length =1, modality='RGB', image_tmpl='img_{:05d}.jpg', transform=None,
                 random_shift=True, test_mode=False, remove_missing=False, dense_sample=False):
        self.root_path = root_path
        self.list_file = list_file
        self.num_segments = num_segments
        self.new_length = new_length
        self.modality = modality
        self.image_tmpl = image_tmpl
        self.transform = transform
        self.random_shift = random_shift
        self.test_mode = test_mode
        self.remove_missing = remove_missing
        self.dense_sample = dense_sample  # using dense sample as I3D

        self._parse_list()
        self.total_list = self.video_list

    def _load_image(self, directory, idx):
        if self.modality == 'RGB' or self.modality == 'RGBDiff':
           # print(self.root_path)
           # print(self.image_tmpl.format(idx))
            try:
                return [
                    cv2.imread(os.path.join(self.root_path, directory, directory + '-' + self.image_tmpl.format(idx)))[:, :,
                    ::-1]]
                # return [Image.open(os.path.join(self.root_path, directory, directory + '-' + self.image_tmpl.format(idx))).convert('RGB')]
            except Exception:
                print('error loading image:',
                      os.path.join(self.root_path, directory, directory + '-' + self.image_tmpl.format(idx)))
                raise
                # return [Image.open(os.path.join(self.root_path, directory, self.image_tmpl.format(1))).convert('RGB')]
        elif self.modality == 'Flow':
            try:
                idx_skip = 1 + (idx - 1) * 5
                flow = Image.open(os.path.join(self.root_path, directory, self.image_tmpl.format(idx_skip))).convert('RGB')
            except Exception:
                print('error loading flow file:', os.path.join(self.root_path, directory, self.image_tmpl.format(idx_skip)))
                flow = Image.open(os.path.join(self.root_path, directory, self.image_tmpl.format(1))).convert('RGB')
            # the input flow file is RGB image with (flow_x, flow_y, blank) for each channel
            flow_x, flow_y, _ = flow.split()
            x_img = flow_x.convert('L')
            y_img = flow_y.convert('L')

            return [x_img, y_img]

    def _parse_list(self):
        #过滤掉不足八张的子目录
        tmp =[x.strip().split(' ') for x in open(self.list_file)]
        tmp = [item for item in tmp if int(item[1]) >= 8]
        self.video_list = [VideoRecord(item) for item in tmp]
      #  print(self.video_list[0])  #把每一个图像目转换成对象
        print('video number:%d'%(len(self.video_list)))
    #dataset 内置的方法,通过该方法,读取

    def _get_val_indices(self, record):
        if record.num_frames > self.num_segments + self.new_length - 1:
            tick = (record.num_frames - self.new_length + 1) / float(self.num_segments)
            offsets = np.array([int(tick / 2.0 + tick * x) for x in range(self.num_segments)])
        else:
            offsets = np.zeros((self.num_segments,))

        return offsets + 1

    def _get_test_indices(self, record):
        tick = (record.num_frames - self.new_length + 1) / float(self.num_segments)

        offsets = np.array([int(tick / 2.0 + tick * x) for x in range(self.num_segments)])

    def __getitem__(self,index):

        record = self.video_list[index]
        while not os.path.exists(os.path.join(self.root_path, record.path, record.path + '-' + self.image_tmpl.format(1))):
           # print('[DEBUG:getitem]')
            print(os.path.join(self.root_path, record.path, record.path + '-' + self.image_tmpl.format(1)))
            index = np.random.randint(len(self.video_list))
            record = self.video_list[index]
        if not self.test_mode:
           # print('[DEBUG:getitem]')
           # print(record.path)
            segment_indices = self._sample_indices(record) if self.random_shift else self._get_val_indices(record)
            print(segment_indices)
        else:
            segment_indices = self._get_test_indices(record)

        return self.get(record, segment_indices)

    def _sample_indices(self, record):
        #print(record.path)
        #print(record.num_frames)
        average_duration = (record.num_frames - self.new_length + 1) // self.num_segments
        if average_duration > 0:
            offsets = np.multiply(list(range(self.num_segments)), average_duration) + randint(average_duration,
                                                                                              size=self.num_segments)
        elif record.num_frames > self.num_segments:
            offsets = np.sort(randint(record.num_frames - self.new_length + 1, size=self.num_segments))
        else:
            offsets = np.zeros((self.num_segments,))
        return offsets + 1

    def get(self, record, indices):
        images = list()
        for seg_ind in indices:
            p = int(seg_ind)
            for i in range(self.new_length):

                seg_imgs = self._load_image(record.path, p)
                #向列表追加值
                images.extend(seg_imgs)
                if p < record.num_frames:
                    p += 1

        process_data = self.transform(images)
        return process_data, record.label

    def __len__(self):
        return len(self.video_list)


#类别      训练目录               图像所在路径
categories, train_list, val_list, root_path, prefix = return_dataset_list('UCF101', 'RGB')

data_length = 1
num_segments = 8
input_mean = [0.485, 0.456, 0.406]
input_std = [0.229, 0.224, 0.225]
normalize = GroupNormalize(input_mean, input_std)
arch = 'RESBET'
#测试
# a = MydataSet(root_path, train_list, num_segments,
#           new_length=data_length,
#           modality="RGB",
#           image_tmpl=prefix,
#           transform=torchvision.transforms.Compose([
#               GroupMultiScaleCrop(224, [1, .875, .75, .66]),
#               GroupRandomHorizontalFlip(is_flow=False),
#               Stack(roll=(arch in ['BNInception', 'InceptionV3'])),
#               ToTorchFormatTensor(div=(arch not in ['BNInception', 'InceptionV3'])),
#               normalize,
#           ]))
# img,label = a[0]
# print(img.size)
#transform 用法;tran=transform.compose([]) tran(img)
batch_size = 1
workers = 2
train_loader = torch.utils.data.DataLoader(
               MydataSet(root_path, train_list, num_segments,
               new_length = data_length,
               modality="RGB",
               image_tmpl=prefix,
               transform=torchvision.transforms.Compose([
                  GroupMultiScaleCrop(224, [1, .875, .75, .66]),
                  GroupRandomHorizontalFlip(is_flow=False),
                  Stack(roll=(arch in ['BNInception', 'InceptionV3'])),
                  ToTorchFormatTensor(div=(arch not in ['BNInception', 'InceptionV3'])),
                  normalize,
              ])),
    batch_size=batch_size, shuffle=True,
    num_workers=workers, pin_memory=True,
    drop_last=True)  # prevent something not % n_GPU

for i, (input, target) in enumerate(train_loader):
    print(input.size())  #
    break

'''
  总结:
      DataSet 数据加载步骤
       一: 通过初始化把train_folder.txt中的内容中的每一行变成一个object,然后所有的存放在一个list中 每个obj包含子文件夹路径,图像个数,label
       
       二:通过 __getItem__这个函数每调用一次就返回num_segment个图像的数据
       通过transform 转换成tensor  [8,224,224,3](可以变)

'''
import torchvision
import random
from PIL import Image, ImageOps
import numpy as np
import numbers
import math
import torch
import cv2


class GroupRandomCrop(object):
    def __init__(self, size):
        if isinstance(size, numbers.Number):
            self.size = (int(size), int(size))
        else:
            self.size = size

    def __call__(self, img_group):

        w, h = img_group[0].size
        th, tw = self.size

        out_images = list()

        x1 = random.randint(0, w - tw)
        y1 = random.randint(0, h - th)

        for img in img_group:
            assert(img.size[0] == w and img.size[1] == h)
            if w == tw and h == th:
                out_images.append(img)
            else:
                out_images.append(img.crop((x1, y1, x1 + tw, y1 + th)))

        return out_images


class GroupCenterCrop(object):
    def __init__(self, size):
        if isinstance(size, numbers.Number):
            self.size = (int(size), int(size))
        else:
            self.size = size
        # self.worker = torchvision.transforms.CenterCrop(size)

    def __call__(self, img_group):
        # return [self.worker(img) for img in img_group]
        return [cv2.resize(img, self.size) for img in img_group]

#随机旋转
class GroupRandomHorizontalFlip(object):
    """Randomly horizontally flips the given PIL.Image with a probability of 0.5
    """
    def __init__(self, is_flow=False):
        self.is_flow = is_flow

    def __call__(self, img_group, is_flow=False):
        v = random.random()
        if v < 0.5:
            # ret = [img.transpose(Image.FLIP_LEFT_RIGHT) for img in img_group]
            ret = [cv2.flip(img, 1) for img in img_group]
            if self.is_flow:
                for i in range(0, len(ret), 2):
                    ret[i] = ImageOps.invert(ret[i])  # invert flow pixel values when flipping
            return ret
        else:
            return img_group


class GroupNormalize(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, tensor):
        rep_mean = self.mean * (tensor.size()[0]//len(self.mean))
        rep_std = self.std * (tensor.size()[0]//len(self.std))

        # TODO: make efficient
        for t, m, s in zip(tensor, rep_mean, rep_std):
            t.sub_(m).div_(s)

        return tensor


class GroupScale(object):
    """ Rescales the input PIL.Image to the given 'size'.
    'size' will be the size of the smaller edge.
    For example, if height > width, then image will be
    rescaled to (size * height / width, size)
    size: size of the smaller edge
    interpolation: Default: PIL.Image.BILINEAR
    """

    def __init__(self, size, interpolation=Image.BILINEAR):
        self.worker = torchvision.transforms.Resize(size, interpolation)

    def __call__(self, img_group):
        return [self.worker(img) for img in img_group]


class GroupOverSample(object):
    def __init__(self, crop_size, scale_size=None):
        self.crop_size = crop_size if not isinstance(crop_size, int) else (crop_size, crop_size)

        if scale_size is not None:
            self.scale_worker = GroupScale(scale_size)
        else:
            self.scale_worker = None

    def __call__(self, img_group):

        if self.scale_worker is not None:
            img_group = self.scale_worker(img_group)

        image_w, image_h = img_group[0].size
        crop_w, crop_h = self.crop_size

        offsets = GroupMultiScaleCrop.fill_fix_offset(False, image_w, image_h, crop_w, crop_h)
        oversample_group = list()
        for o_w, o_h in offsets:
            normal_group = list()
            flip_group = list()
            for i, img in enumerate(img_group):
                crop = img.crop((o_w, o_h, o_w + crop_w, o_h + crop_h))
                normal_group.append(crop)
                flip_crop = crop.copy().transpose(Image.FLIP_LEFT_RIGHT)

                if img.mode == 'L' and i % 2 == 0:
                    flip_group.append(ImageOps.invert(flip_crop))
                else:
                    flip_group.append(flip_crop)

            oversample_group.extend(normal_group)
            oversample_group.extend(flip_group)
        return oversample_group

#是否裁剪
class GroupMultiScaleCrop(object):

    def __init__(self, input_size, scales=None, max_distort=1, fix_crop=True, more_fix_crop=True):
        self.scales = scales if scales is not None else [1, .875, .75, .66]
        self.max_distort = max_distort
        self.fix_crop = fix_crop
        self.more_fix_crop = more_fix_crop
        self.input_size = input_size if not isinstance(input_size, int) else [input_size, input_size]
        self.interpolation = cv2.INTER_LINEAR

    def __call__(self, img_group):

        #im_size = img_group[0].size
        im_size = img_group[0].shape[:2]


        crop_w, crop_h, offset_w, offset_h = self._sample_crop_size(im_size)
       # print(crop_w, crop_h, offset_w, offset_h)
        # crop_img_group = [img.crop((offset_w, offset_h, offset_w + crop_w, offset_h + crop_h)) for img in img_group]
        crop_img_group = [img[offset_w: offset_w + crop_w, offset_h: offset_h + crop_h] for img in img_group]
        #ret_img_group = [img.resize((self.input_size[0], self.input_size[1]), self.interpolation) for img in crop_img_group]
        ret_img_group = [cv2.resize(img, (self.input_size[0], self.input_size[1]), interpolation=self.interpolation) for img in crop_img_group]
        return ret_img_group

    def _sample_crop_size(self, im_size):
        image_w, image_h = im_size[0], im_size[1]

        # find a crop size
        base_size = min(image_w, image_h)
        crop_sizes = [int(base_size * x) for x in self.scales]
        crop_h = [self.input_size[1] if abs(x - self.input_size[1]) < 3 else x for x in crop_sizes]
        crop_w = [self.input_size[0] if abs(x - self.input_size[0]) < 3 else x for x in crop_sizes]

        pairs = []
        for i, h in enumerate(crop_h):
            for j, w in enumerate(crop_w):
                if abs(i - j) <= self.max_distort:
                    pairs.append((w, h))

        crop_pair = random.choice(pairs)
        if not self.fix_crop:
            w_offset = random.randint(0, image_w - crop_pair[0])
            h_offset = random.randint(0, image_h - crop_pair[1])
        else:
            w_offset, h_offset = self._sample_fix_offset(image_w, image_h, crop_pair[0], crop_pair[1])

        return crop_pair[0], crop_pair[1], w_offset, h_offset

    def _sample_fix_offset(self, image_w, image_h, crop_w, crop_h):
        offsets = self.fill_fix_offset(self.more_fix_crop, image_w, image_h, crop_w, crop_h)
        return random.choice(offsets)

    @staticmethod
    def fill_fix_offset(more_fix_crop, image_w, image_h, crop_w, crop_h):
        w_step = (image_w - crop_w) // 4
        h_step = (image_h - crop_h) // 4

        ret = list()
        ret.append((0, 0))  # upper left
        ret.append((4 * w_step, 0))  # upper right
        ret.append((0, 4 * h_step))  # lower left
        ret.append((4 * w_step, 4 * h_step))  # lower right
        ret.append((2 * w_step, 2 * h_step))  # center

        if more_fix_crop:
            ret.append((0, 2 * h_step))  # center left
            ret.append((4 * w_step, 2 * h_step))  # center right
            ret.append((2 * w_step, 4 * h_step))  # lower center
            ret.append((2 * w_step, 0 * h_step))  # upper center

            ret.append((1 * w_step, 1 * h_step))  # upper left quarter
            ret.append((3 * w_step, 1 * h_step))  # upper right quarter
            ret.append((1 * w_step, 3 * h_step))  # lower left quarter
            ret.append((3 * w_step, 3 * h_step))  # lower righ quarter

        return ret


class GroupRandomSizedCrop(object):
    """Random crop the given PIL.Image to a random size of (0.08 to 1.0) of the original size
    and and a random aspect ratio of 3/4 to 4/3 of the original aspect ratio
    This is popularly used to train the Inception networks
    size: size of the smaller edge
    interpolation: Default: PIL.Image.BILINEAR
    """
    def __init__(self, size, interpolation=Image.BILINEAR):
        self.size = size
        self.interpolation = interpolation

    def __call__(self, img_group):
        for attempt in range(10):
            area = img_group[0].size[0] * img_group[0].size[1]
            target_area = random.uniform(0.08, 1.0) * area
            aspect_ratio = random.uniform(3. / 4, 4. / 3)

            w = int(round(math.sqrt(target_area * aspect_ratio)))
            h = int(round(math.sqrt(target_area / aspect_ratio)))

            if random.random() < 0.5:
                w, h = h, w

            if w <= img_group[0].size[0] and h <= img_group[0].size[1]:
                x1 = random.randint(0, img_group[0].size[0] - w)
                y1 = random.randint(0, img_group[0].size[1] - h)
                found = True
                break
        else:
            found = False
            x1 = 0
            y1 = 0

        if found:
            out_group = list()
            for img in img_group:
                img = img.crop((x1, y1, x1 + w, y1 + h))
                assert(img.size == (w, h))
                out_group.append(img.resize((self.size, self.size), self.interpolation))
            return out_group
        else:
            # Fallback
            scale = GroupScale(self.size, interpolation=self.interpolation)
            crop = GroupRandomCrop(self.size)
            return crop(scale(img_group))


class Stack(object):

    def __init__(self, roll=False):
        self.roll = roll

    def __call__(self, img_group):
        if self.roll:
            #print(self.roll)
            return np.concatenate([np.array(x)[:, :, ::-1] for x in img_group], axis=2)
        else:
            # return np.concatenate(img_group, axis=2)
            # return num_segments * H * W * C
            return np.array(img_group)


class ToTorchFormatTensor(object):
    """ Converts a PIL.Image (RGB) or numpy.ndarray (H x W x C) in the range [0, 255]
    to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0] """
    def __init__(self, div=True):
        self.div = div

    def __call__(self, pic):
        if isinstance(pic, np.ndarray):
            print(pic.shape)
            # handle numpy array
            #img = torch.from_numpy(pic).permute(2, 0, 1).contiguous()
            img = torch.from_numpy(pic).permute(3, 0, 1, 2).contiguous()
        else:
            # handle PIL Image
            img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
            img = img.view(pic.size[1], pic.size[0], len(pic.mode))
            # put it from HWC to CHW format
            # yikes, this transpose takes 80% of the loading time/CPU
            img = img.transpose(0, 1).transpose(0, 2).contiguous()
        return img.float().div(255) if self.div else img.float()


class IdentityTransform(object):

    def __call__(self, data):
        return data


# if __name__ == "__main__":
#     trans = torchvision.transforms.Compose([
#         GroupScale(256),
#         GroupRandomCrop(224),
#         Stack(),
#         ToTorchFormatTensor(),
#         GroupNormalize(
#             mean=[.485, .456, .406],
#             std=[.229, .224, .225]
#         )]
#     )
#
#     im = Image.open('../tensorflow-model-zoo.torch/lena_299.png')
#
#     color_group = [im] * 3
#     rst = trans(color_group)
#
#     gray_group = [im.convert('L')] * 9
#     gray_rst = trans(gray_group)
#
#     trans2 = torchvision.transforms.Compose([
#         GroupRandomSizedCrop(256),
#         Stack(),
#         ToTorchFormatTensor(),
#         GroupNormalize(
#             mean=[.485, .456, .406],
#             std=[.229, .224, .225])
#     ])
#     print(trans2(color_group))

 

  • 1
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
以下是一个简单的风速数据预处理代码示例,使用PyTorch: ``` import torch import numpy as np import pandas as pd # 读取风速数据 data = pd.read_csv('wind_speed.csv') # 将数据转换为PyTorch张量 data_tensor = torch.from_numpy(data.values).float() # 数据归一化 data_tensor = (data_tensor - data_tensor.min()) / (data_tensor.max() - data_tensor.min()) # 分离输入和输出 input_data = data_tensor[:, :-1] output_data = data_tensor[:, -1:] # 打乱数据 shuffle_indices = torch.randperm(input_data.shape[0]) input_data = input_data[shuffle_indices] output_data = output_data[shuffle_indices] # 分割数据集 train_size = int(0.8 * input_data.shape[0]) train_input, test_input = input_data[:train_size], input_data[train_size:] train_output, test_output = output_data[:train_size], output_data[train_size:] # 将数据集分批 batch_size = 32 train_dataset = torch.utils.data.TensorDataset(train_input, train_output) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) test_dataset = torch.utils.data.TensorDataset(test_input, test_output) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True) # 打印数据集的大小 print(f"训练集大小:{len(train_dataset)}, 测试集大小:{len(test_dataset)}") ``` 这个代码片段将数据读入一个Pandas DataFrame中,将其转换为一个PyTorch张量,对数据进行归一化处理,分离输入和输出,打乱数据集,分割数据集,并将其分成批次。最后,打印出数据集的大小,以确保数据已正确加载
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值