数据读取
torch.utils.data.DataLoader()
dataset: Dataset[T_co] batch_size: Optional[int] num_workers: int pin_memory: bool drop_last: bool timeout: float sampler: Union[Sampler, Iterable] pin_memory_device: str prefetch_factor: Optional[int] _iterator : Optional['_BaseDataLoaderIter'] __initialized = False
dataset:Dataset类
batch_size:批次大小
num_workers:多进程读取数据
drop_last:当样本数不能被batch_size整除时,是否丢弃最后一批次的数据
torch.utils.data.Dataset()
class Dataset(Generic[T_co]): def __getitem__(self, index) -> T_co: raise NotImplementedError("Subclasses of Dataset should implement __getitem__.") # def __getitems__(self, indices: List) -> List[T_co]: # Not implemented to prevent false-positives in fetcher check in # torch.utils.data._utils.fetch._MapDatasetFetcher def __add__(self, other: 'Dataset[T_co]') -> 'ConcatDataset[T_co]': return ConcatDataset([self, other])
getitems:接收一个索引,返回一个样本,可以使用默认方式加载索引,也可以通过定义类属性的方式自定义自己的索引,在getitems方法中通过索引加载数据。
class TrainDataset(Dataset):
def __init__(self, data_path):
mix_dir = os.path.join(data_path, 'r.txt')
s_dir = os.path.join(data_path, 'x.txt')
mix_infos = []
for line in open(mix_dir, "r"): # 设置文件对象并读取每一行文件
name = data_path + '/r/' + line[:-1]
mix_infos.append(name) # 将每一行文件加入到list中
s_infos = []
for line in open(s_dir, "r"): # 设置文件对象并读取每一行文件
name_ = data_path + '/x/' + line[:-1]
s_infos.append(name_) # 将每一行文件加入到list中
self.mat_mix = mix_infos
self.mat_s1 = s_infos
def __len__(self):
return len(self.mat_mix)
def __getitem__(self, item):
mat_mix_index = self.mat_mix[item]
mat_s1_index = self.mat_s1[item]
mix = scio.loadmat(mat_mix_index)
s = scio.loadmat(mat_s1_index)
x_name = mat_s1_index.split('_')[0][-1] + '_' + mat_s1_index.split('_')[-1]
r_name = mat_mix_index.split('_')[0][-1] + '_' + mat_mix_index.split('_')[-1]
sample = {'r_name': r_name, 'r': mix, 'x_name': x_name, 'x': s}
return sample
数据增强
通过transformer实现对图像数据的增强
train_transform = transforms.Compose([transforms.Resize((224, 224))
原始图片(224,224)
根据call方法,transform方法输入一个返回一个,当前方法的输入是上一方法的输出。
class Compose:
def __init__(self, transforms):
if not torch.jit.is_scripting() and not torch.jit.is_tracing():
_log_api_usage_once(self)
self.transforms = transforms
def __call__(self, img):
for t in self.transforms:
img = t(img)
return img
def __repr__(self) -> str:
format_string = self.__class__.__name__ + "("
for t in self.transforms:
format_string += "\n"
format_string += f" {t}"
format_string += "\n)"
return format_string
一、裁剪
transforms.CenterCrop()
class CenterCrop(torch.nn.Module):
def __init__(self, size):
super().__init__()
_log_api_usage_once(self)
self.size = _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.")
def forward(self, img):
"""
Args:
img (PIL Image or Tensor): Image to be cropped.
Returns:
PIL Image or Tensor: Cropped image.
"""
return F.center_crop(img, self.size)
def __repr__(self) -> str:
return f"{self.__class__.__name__}(size={self.size})"
从图像中心裁剪图像
size:剪切后的大小,比原始尺寸小时直接剪切
比原始尺寸大时进行填充
transforms.RandomCrop()
class RandomCrop(torch.nn.Module):
"""Crop the given image at a random location.
If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions,
but if non-constant padding is used, the input is expected to have at most 2 leading dimensions
Args:
size (sequence or int): Desired output size of the crop. If size is an
int instead of sequence like (h, w), a square crop (size, size) is
made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
padding (int or sequence, optional): Optional padding on each border
of the image. Default is None. If a single int is provided this
is used to pad all borders. If sequence of length 2 is provided this is the padding
on left/right and top/bottom respectively. If a sequence of length 4 is provided
this is the padding for the left, top, right and bottom borders respectively.
.. note::
In torchscript mode padding as single int is not supported, use a sequence of
length 1: ``[padding, ]``.
pad_if_needed (boolean): It will pad the image if smaller than the
desired size to avoid raising an exception. Since cropping is done
after padding, the padding seems to be done at a random offset.
fill (number or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of
length 3, it is used to fill R, G, B channels respectively.
This value is only used when the padding_mode is constant.
Only number is supported for torch Tensor.
Only int or tuple value is supported for PIL Image.
padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric.
Default is constant.
- constant: pads with a constant value, this value is specified with fill
- edge: pads with the last value at the edge of the image.
If input a 5D torch Tensor, the last 3 dimensions will be padded instead of the last 2
- reflect: pads with reflection of image without repeating the last value on the edge.
For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
will result in [3, 2, 1, 2, 3, 4, 3, 2]
- symmetric: pads with reflection of image repeating the last value on the edge.
For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
will result in [2, 1, 1, 2, 3, 4, 4, 3]
"""
@staticmethod
def get_params(img: Tensor, output_size: Tuple[int, int]) -> Tuple[int, int, int, int]:
"""Get parameters for ``crop`` for a random crop.
Args:
img (PIL Image or Tensor): Image to be cropped.
output_size (tuple): Expected output size of the crop.
Returns:
tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
"""
_, h, w = F.get_dimensions(img)
th, tw = output_size
if h < th or w < tw:
raise ValueError(f"Required crop size {(th, tw)} is larger than input image size {(h, w)}")
if w == tw and h == th:
return 0, 0, h, w
i = torch.randint(0, h - th + 1, size=(1,)).item()
j = torch.randint(0, w - tw + 1, size=(1,)).item()
return i, j, th, tw
def __init__(self, size, padding=None, pad_if_needed=False, fill=0, padding_mode="constant"):
super().__init__()
_log_api_usage_once(self)
self.size = tuple(_setup_size(size, error_msg="Please provide only two dimensions (h, w) for size."))
self.padding = padding
self.pad_if_needed = pad_if_needed
self.fill = fill
self.padding_mode = padding_mode
def forward(self, img):
"""
Args:
img (PIL Image or Tensor): Image to be cropped.
Returns:
PIL Image or Tensor: Cropped image.
"""
if self.padding is not None:
img = F.pad(img, self.padding, self.fill, self.padding_mode)
_, height, width = F.get_dimensions(img)
# pad the width if needed
if self.pad_if_needed and width < self.size[1]:
padding = [self.size[1] - width, 0]
img = F.pad(img, padding, self.fill, self.padding_mode)
# pad the height if needed
if self.pad_if_needed and height < self.size[0]:
padding = [0, self.size[0] - height]
img = F.pad(img, padding, self.fill, self.padding_mode)
i, j, h, w = self.get_params(img, self.size)
return F.crop(img, i, j, h, w)
def __repr__(self) -> str:
return f"{self.__class__.__name__}(size={self.size}, padding={self.padding})"
size:裁剪后的尺寸
padding:当padding=a时上下左右均填充a个像素
transforms.RandomCrop(224, padding=16)
当padding=(a,b)时,左右填充a个像素,上下填充b个像素
transforms.RandomCrop(224, padding=(16, 64))
当padding=(a,b,c,d)时左、上、右、下分别填充a、b、c、d个像素
padding_model:填充模式,4种:‘constant’、‘edge’、‘reflect’、‘symmetric’,
constant:单一像素,像素颜色由fill决定,
transforms.RandomCrop(224, padding=16, fill=(255, 0, 0))
edge:边缘像素,
transforms.RandomCrop(224, padding=64, padding_mode='edge'),
reflect:不包含边缘的镜像,比如[1,2,3]-->[3,2,1,2,3,2,1]
transforms.RandomCrop(224, padding=64, padding_mode='reflect'),
symmetric:包含边缘的镜像,比如[1,2,3]-->[3,2,1,1,2,3,3,2,1]
transforms.RandomCrop(224, padding=64, padding_mode='symmetric')
transforms.RandomResizeCrop()
class RandomResizedCrop(torch.nn.Module):
"""Crop a random portion of image and resize it to a given size.
If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
A crop of the original image is made: the crop has a random area (H * W)
and a random aspect ratio. This crop is finally resized to the given
size. This is popularly used to train the Inception networks.
Args:
size (int or sequence): expected output size of the crop, for each edge. If size is an
int instead of sequence like (h, w), a square output size ``(size, size)`` is
made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
.. note::
In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``.
scale (tuple of float): Specifies the lower and upper bounds for the random area of the crop,
before resizing. The scale is defined with respect to the area of the original image.
ratio (tuple of float): lower and upper bounds for the random aspect ratio of the crop, before
resizing.
interpolation (InterpolationMode): Desired interpolation enum defined by
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.NEAREST_EXACT``,
``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported.
The corresponding Pillow integer constants, e.g. ``PIL.Image.BILINEAR`` are accepted as well.
antialias (bool, optional): Whether to apply antialiasing.
It only affects **tensors** with bilinear or bicubic modes and it is
ignored otherwise: on PIL images, antialiasing is always applied on
bilinear or bicubic modes; on other modes (for PIL images and
tensors), antialiasing makes no sense and this parameter is ignored.
Possible values are:
- ``True``: will apply antialiasing for bilinear or bicubic modes.
Other mode aren't affected. This is probably what you want to use.
- ``False``: will not apply antialiasing for tensors on any mode. PIL
images are still antialiased on bilinear or bicubic modes, because
PIL doesn't support no antialias.
- ``None``: equivalent to ``False`` for tensors and ``True`` for
PIL images. This value exists for legacy reasons and you probably
don't want to use it unless you really know what you are doing.
The current default is ``None`` **but will change to** ``True`` **in
v0.17** for the PIL and Tensor backends to be consistent.
"""
def __init__(
self,
size,
scale=(0.08, 1.0),
ratio=(3.0 / 4.0, 4.0 / 3.0),
interpolation=InterpolationMode.BILINEAR,
antialias: Optional[Union[str, bool]] = "warn",
):
super().__init__()
_log_api_usage_once(self)
self.size = _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.")
if not isinstance(scale, Sequence):
raise TypeError("Scale should be a sequence")
if not isinstance(ratio, Sequence):
raise TypeError("Ratio should be a sequence")
if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
warnings.warn("Scale and ratio should be of kind (min, max)")
if isinstance(interpolation, int):
interpolation = _interpolation_modes_from_int(interpolation)
self.interpolation = interpolation
self.antialias = antialias
self.scale = scale
self.ratio = ratio
@staticmethod
def get_params(img: Tensor, scale: List[float], ratio: List[float]) -> Tuple[int, int, int, int]:
"""Get parameters for ``crop`` for a random sized crop.
Args:
img (PIL Image or Tensor): Input image.
scale (list): range of scale of the origin size cropped
ratio (list): range of aspect ratio of the origin aspect ratio cropped
Returns:
tuple: params (i, j, h, w) to be passed to ``crop`` for a random
sized crop.
"""
_, height, width = F.get_dimensions(img)
area = height * width
log_ratio = torch.log(torch.tensor(ratio))
for _ in range(10):
target_area = area * torch.empty(1).uniform_(scale[0], scale[1]).item()
aspect_ratio = torch.exp(torch.empty(1).uniform_(log_ratio[0], log_ratio[1])).item()
w = int(round(math.sqrt(target_area * aspect_ratio)))
h = int(round(math.sqrt(target_area / aspect_ratio)))
if 0 < w <= width and 0 < h <= height:
i = torch.randint(0, height - h + 1, size=(1,)).item()
j = torch.randint(0, width - w + 1, size=(1,)).item()
return i, j, h, w
# Fallback to central crop
in_ratio = float(width) / float(height)
if in_ratio < min(ratio):
w = width
h = int(round(w / min(ratio)))
elif in_ratio > max(ratio):
h = height
w = int(round(h * max(ratio)))
else: # whole image
w = width
h = height
i = (height - h) // 2
j = (width - w) // 2
return i, j, h, w
def forward(self, img):
"""
Args:
img (PIL Image or Tensor): Image to be cropped and resized.
Returns:
PIL Image or Tensor: Randomly cropped and resized image.
"""
i, j, h, w = self.get_params(img, self.scale, self.ratio)
return F.resized_crop(img, i, j, h, w, self.size, self.interpolation, antialias=self.antialias)
def __repr__(self) -> str:
interpolate_str = self.interpolation.value
format_string = self.__class__.__name__ + f"(size={self.size}"
format_string += f", scale={tuple(round(s, 4) for s in self.scale)}"
format_string += f", ratio={tuple(round(r, 4) for r in self.ratio)}"
format_string += f", interpolation={interpolate_str}"
format_string += f", antialias={self.antialias})"
return format_string
size:裁剪后图片大小
scale:随机裁剪面积的比例,默认(0.08,1)
ratio:随机长宽比,默认(3/4,4/3)
interpolation:插值方法
transforms.RandomResizedCrop(size=224, scale=(0.5, 0.5))
transforms.FiveCrop()
在上下左右中心裁剪出,size大小的图像
transforms.TenCrop()
除FiveCrop的图像外,进一步对FiveCrop的图像进行翻转
size:剪切后的图像大小
vertical_flip:True(垂直翻转)/False(水平翻转)
二、旋转
transforms.RandomHorizontalFlip()
ransforms.RandomHorizontalFlip(p=1)
p:概率,依照概率水平翻转图片
transforms.RandomVerticalFlip()
transforms.RandomVerticalFlip(p=1)
p:概率,依照概率垂直旋转图像
transforms.RandomRotation()
transforms.RandomRotation(30, center=(0, 0), expand=True)
degrees:旋转角度
center:旋转中心
resample:重采样方法
expand:是否扩张图片
三、图像变换
transforms.pad()
transforms.Pad(padding=(8, 16, 32, 64), padding_mode='symmetric')
padding:(a,b,c,d)左上右下的填充像素
padding_mode:4种:‘constant’、‘edge’、‘reflect’、‘symmetric’
transforms.ColorJitter()
transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.3)
brightness:亮度,当输入为a时,在[max(0,1-a), 1+a]中随机选取亮度;当输入为(a,b)时,在[a,b]之间选取亮度
contrast:对比度,设置如亮度
saturation:饱和度,设置如亮度
hue:色相,当为a时,从[-a,a]之间选择,a在[0,0.5]之间;当为(a,b)时,在[a,b]之间选择,a<=b,且a,b在[0,0.5]之间
transforms.Grayscale()
transforms.RandomGrayscale(p=1)
p:概率,依据概率将图片转化为灰度图
transforms.RandomAffine()
def __init__( self, degrees, translate=None, scale=None, shear=None, interpolation=InterpolationMode.NEAREST, fill=0, center=None, ):
shear:斜切
transforms.RandomAffine(degrees=0, shear=90, fill=(255, 0, 0))
transforms.RandomErasing()
transforms.ToTensor(), transforms.RandomErasing(p=1, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=(254/255, 0, 0))
self.p = p
self.scale = scale
self.ratio = ratio
self.value = value
self.inplace = inplace
p:执行遮挡的概率
scale:遮挡的面积
ratio:遮挡区域长宽比
value:遮挡区域的颜色
四、transform操作
transforms.RandomChoice()
class RandomChoice(RandomTransforms):
"""Apply single transformation randomly picked from a list. This transform does not support torchscript."""
def __init__(self, transforms, p=None):
super().__init__(transforms)
if p is not None and not isinstance(p, Sequence):
raise TypeError("Argument p should be a sequence")
self.p = p
def __call__(self, *args):
t = random.choices(self.transforms, weights=self.p)[0]
return t(*args)
def __repr__(self) -> str:
return f"{super().__repr__()}(p={self.p})"
transforms.RandomChoice([transforms.RandomVerticalFlip(p=1),transforms.RandomHorizontalFlip(p=1)])
在包含的transforms方法中任选一个
transforms.RandomApply()
class RandomApply(torch.nn.Module):
"""Apply randomly a list of transformations with a given probability.
.. note::
In order to script the transformation, please use ``torch.nn.ModuleList`` as input instead of list/tuple of
transforms as shown below:
>>> transforms = transforms.RandomApply(torch.nn.ModuleList([
>>> transforms.ColorJitter(),
>>> ]), p=0.3)
>>> scripted_transforms = torch.jit.script(transforms)
Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
`lambda` functions or ``PIL.Image``.
Args:
transforms (sequence or torch.nn.Module): list of transformations
p (float): probability
"""
def __init__(self, transforms, p=0.5):
super().__init__()
_log_api_usage_once(self)
self.transforms = transforms
self.p = p
def forward(self, img):
if self.p < torch.rand(1):
return img
for t in self.transforms:
img = t(img)
return img
def __repr__(self) -> str:
format_string = self.__class__.__name__ + "("
format_string += f"\n p={self.p}"
for t in self.transforms:
format_string += "\n"
format_string += f" {t}"
format_string += "\n)"
return format_string
transforms.RandomApply([transforms.RandomAffine(degrees=0, shear=45, fill=(255, 0, 0)), transforms.Grayscale(num_output_channels=3)], p=0.5)
p:依据概率执行一组transforms方法
transforms.RandomOrder()
transforms.RandomOrder([transforms.RandomRotation(15), transforms.Pad(padding=32), transforms.RandomAffine(degrees=0, translate=(0.01, 0.1), scale=(0.9, 1.1))])
对一组transforms方法打乱顺序执行
五、自定义transforms方法
1.继承类
2.在init中定义属性
3.在call中定义方法
class NAME(object):
"""note
Args:
xx: xxxxxx
xx: xxxxxx
"""
def __init__(self, xx_1 , xx_2 = default_value):
assert xxxxxxx # 声明
self.xx_1 = xx_1
self.xx_2 = xx_2
def __call__(self, img):
"""
Args:
xxxxx
Returns:
xxxx
"""
xxxx
return xxx
class AddPepperNoise(object): """增加椒盐噪声 Args: snr (float): Signal Noise Rate p (float): 概率值,依概率执行该操作 """ def __init__(self, snr, p=0.9): assert isinstance(snr, float) and (isinstance(p, float)) # 2020 07 26 or --> and self.snr = snr self.p = p def __call__(self, img): """ Args: img (PIL Image): PIL Image Returns: PIL Image: PIL image. """ if random.uniform(0, 1) < self.p: img_ = np.array(img).copy() h, w, c = img_.shape signal_pct = self.snr noise_pct = (1 - self.snr) mask = np.random.choice((0, 1, 2), size=(h, w, 1), p=[signal_pct, noise_pct/2., noise_pct/2.]) mask = np.repeat(mask, c, axis=2) img_[mask == 1] = 255 # 盐噪声 img_[mask == 2] = 0 # 椒噪声 return Image.fromarray(img_.astype('uint8')).convert('RGB') else: return img