对比torchvision优势
1.Faster than torchvision on every batch
2.Support for more tasks like segmentation and detection which is a lot harder to do in torchvision
images:
cat_dogs:数据集每个文件夹里有相应的图片文件
1.classification.py
import cv2
import albumentations as A
import numpy as np
from utils import plot_examples
from PIL import Image
image = Image.open("images/elon.jpeg")
transform = A.Compose(
[
A.Resize(width=1920, height=1080),
A.RandomCrop(width=1280, height=720),
A.Rotate(limit=40, p=0.9, border_mode=cv2.BORDER_CONSTANT),
# limit 个数 0.9的概率
A.HorizontalFlip(p=0.5),
A.VerticalFlip(p=0.1),
A.RGBShift(r_shift_limit=25, g_shift_limit=25, b_shift_limit=25, p=0.9),
A.OneOf([
A.Blur(blur_limit=3, p=0.5),
A.ColorJitter(p=0.5),
], p=1.0),
]
)
images_list = [image]
image = np.array(image)
for i in range(15):
augmentations = transform(image=image)
augmented_img = augmentations["image"]
images_list.append(augmented_img)
plot_examples(images_list)
2.segmentation.py
import cv2
import albumentations as A
import numpy as np
from utils import plot_examples
from PIL import Image
image = Image.open("images/elon.jpeg")
mask = Image.open("images/mask.jpeg")
mask2 = Image.open("images/second_mask.jpeg")
transform = A.Compose(
[
A.Resize(width=1920, height=1080),
A.RandomCrop(width=1280, height=720),
A.Rotate(limit=40, p=0.9, border_mode=cv2.BORDER_CONSTANT),
A.HorizontalFlip(p=0.5),
A.VerticalFlip(p=0.1),
A.RGBShift(r_shift_limit=25, g_shift_limit=25, b_shift_limit=25, p=0.9),
A.OneOf([
A.Blur(blur_limit=3, p=0.5),
A.ColorJitter(p=0.5),
], p=1.0),
]
)
images_list = [image]
image = np.array(image)
mask = np.array(mask) # np.asarray(mask), np.array(mask)
mask2 = np.array(mask2)
for i in range(4):
augmentations = transform(image=image, masks=[mask, mask2])
augmented_img = augmentations["image"]
augmented_masks = augmentations["masks"]
images_list.append(augmented_img)
images_list.append(augmented_masks[0])
images_list.append(augmented_masks[1])
plot_examples(images_list)
3.detection.py
import cv2
import albumentations as A
import numpy as np
from utils import plot_examples
from PIL import Image
image = cv2.imread("images/cat.jpg")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# 颜色空间转换函数
bboxes = [[13, 170, 224, 410]]
# list[list[]]
# Pascal_voc (x_min, y_min, x_max, y_max), YOLO, COCO
transform = A.Compose(
[
A.Resize(width=1920, height=1080),
A.RandomCrop(width=1280, height=720),
A.Rotate(limit=40, p=0.9, border_mode=cv2.BORDER_CONSTANT),
A.HorizontalFlip(p=0.5),
A.VerticalFlip(p=0.1),
A.RGBShift(r_shift_limit=25, g_shift_limit=25, b_shift_limit=25, p=0.9),
A.OneOf([
A.Blur(blur_limit=3, p=0.5),
A.ColorJitter(p=0.5),
], p=1.0),
], bbox_params=A.BboxParams(format="pascal_voc", min_area=2048,
min_visibility=0.3, label_fields=[])
)
images_list = [image]
saved_bboxes = [bboxes[0]]
for i in range(15):
augmentations = transform(image=image, bboxes=bboxes)
augmented_img = augmentations["image"]
if len(augmentations["bboxes"]) == 0:
continue
images_list.append(augmented_img)
saved_bboxes.append(augmentations["bboxes"][0])
plot_examples(images_list, saved_bboxes)
full_pytorch_example
import torch
import numpy as np
import cv2
from PIL import Image
import torch.nn as nn
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset
import os
class ImageFolder(Dataset):
def __init__(self, root_dir, transform=None):
super(ImageFolder, self).__init__()
self.data = []
#[(cat_0.jpg,0),...,(dog_0.jpg,1)]
self.root_dir = root_dir
self.transform = transform
self.class_names = os.listdir(root_dir)
for index, name in enumerate(self.class_names):
files = os.listdir(os.path.join(root_dir, name))
self.data += list(zip(files, [index]*len(files)))
def __len__(self):
return len(self.data)
def __getitem__(self, index):
img_file, label = self.data[index]
root_and_dir = os.path.join(self.root_dir, self.class_names[label])
image = np.array(Image.open(os.path.join(root_and_dir, img_file)))
if self.transform is not None:
augmentations = self.transform(image=image)
# 可以加 bboxes = bboxes
image = augmentations["image"]
return image, label
transform = A.Compose(
[
A.Resize(width=1920, height=1080),
A.RandomCrop(width=1280, height=720),
A.Rotate(limit=40, p=0.9, border_mode=cv2.BORDER_CONSTANT),
A.HorizontalFlip(p=0.5),
A.VerticalFlip(p=0.1),
A.RGBShift(r_shift_limit=25, g_shift_limit=25, b_shift_limit=25, p=0.9),
A.OneOf([
A.Blur(blur_limit=3, p=0.5),
A.ColorJitter(p=0.5),
], p=1.0),
# ToTensor() -> Normalize(mean,stds)
A.Normalize(
mean=[0, 0, 0],
std=[1, 1, 1],
max_pixel_value=255,
),
ToTensorV2(),
]
)
dataset = ImageFolder(root_dir="cat_dogs", transform=transform)
for x,y in dataset:
print(x.shape)