语义分割实现地表建筑物识别2 数据扩增部分
主要内容
- 学习常用数据扩增方法,并用opencv和albumentations库完成具体操作
- 学习pytorch读取图像
数据扩增的意义:增加训练样本量,也可以有效增加样本的语义空间。是一种有效的正则化方法,可以防止模型过拟合。
数据扩增方法分类:
- 标签不变的数据扩增方法:数据变换后图像类别不变
- 标签变化的数据扩增方法:数据变换后图像类别变化。
对于语义分割,常规的数据扩增方法都是标签变化的扩增方法。
这次学习的数据扩增方法主要是垂直/水平翻转、裁剪、旋转等。
课后作业
1. 使用OpenCV完成图像加噪数据扩增
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import albumentations as A
def rle_decode(mask_rle,shape=(512,512)):
'''将rle格式进行解码为图片'''
if mask_rle is np.nan:
return None
s = mask_rle.split()
starts,lengths = [np.asarray(x,dtype=int) for x in (s[0:][::2],s[1:][::2])]
starts -= 1
ends = starts + lengths
img = np.zeros(shape[0]*shape[1],dtype=np.uint8)
for lo,hi in zip(starts,ends):
img[lo:hi] = 1
return img.reshape(shape,order='F')
def fig_plot(img1,img2):
plt.figure(figsize=(16,8))
plt.subplot(1,2,1)
plt.imshow(img1)
plt.subplot(1,2,2)
plt.imshow(img2)
原始图片
train_mask = pd.read_csv('./train_mask.csv',sep='\t',names=['name','mask'])
i = 23557
img = cv2.imread('./train/'+train_mask['name'].iloc[i])
mask = rle_decode(train_mask['mask'].iloc[i])
fig_plot(img,mask)
翻转
# 垂直翻转
fig_plot(cv2.flip(img,0),cv2.flip(mask,0))
# 水平翻转
fig_plot(cv2.flip(img,0),cv2.flip(mask,0))
裁剪
# 随机裁剪
x,y = np.random.randint(0,256),np.random.randint(0,256)
fig_plot(img[x:x+256,y:y+256],mask[x:x+256,y:y+256])
- https://stackoverflow.com/questions/14435632/impulse-gaussian-and-salt-and-pepper-noise-with-opencv#https://stackoverflow.com/questions/14435632/impulse-gaussian-and-salt-and-pepper-noise-with-opencv#
- https://www.cnblogs.com/wmy-ncut/p/9995687.html
- https://www.cnblogs.com/mlan/p/8179028.html
添加噪声
'''
Parameters
----------
image : ndarray
Input image data. Will be converted to float.
mode : str
One of the following strings, selecting the type of noise to add:
'gauss' Gaussian-distributed additive noise.
'poisson' Poisson-distributed noise generated from the data.
's&p' Replaces random pixels with 0 or 1.
'speckle' Multiplicative noise using out = image + n*image,where
n,is uniform noise with specified mean & variance.
'''
import numpy as np
import os
import cv2
def noisy(noise_typ,image):
if noise_typ == "gauss":
row,col,ch= image.shape
mean = 0
std = 20 #直接影响噪声质量,数值越大噪声比例越高
gauss = np.random.normal(mean,std,(row,col,ch))
gauss = gauss.reshape(row,col,ch)
noisy_ = image + gauss
return noisy_.astype('uint8')
# noisy = image.copy()
# gauss = np.zeros(image.shape, np.uint8) # do not use original image it overwrites the image
# mean = 0
# sigma = 100 #100的时候偏红是什么情况
# gauss = cv2.randn(gauss,mean,sigma) # create the random distribution
# noisy = cv2.add(noisy, gauss) # add the noise to the original image
# return noisy
elif noise_typ == "s&p":
s_vs_p = 0.5
amount = 0.004
out = image.copy()
# Salt mode
num_salt = np.ceil(amount * image.size * s_vs_p)
coords = [np.random.random_integers(0, i - 1, int(num_salt))
for i in [image.shape[0],image.shape[1]]] #注意第三个channel要全部置位,否则会变成红绿色的椒盐 啊哈哈哈
out[coords] = 255
# Pepper mode
num_pepper = np.ceil(amount* image.size * (1. - s_vs_p))
coords = [np.random.random_integers(0, i - 1, int(num_pepper))
for i in [image.shape[0],image.shape[1]]] #注意第三个channel要全部置位,否则会变成红绿色的椒盐 啊哈哈哈
out[coords] = 0
return out
elif noise_typ == "poisson":
vals = len(np.unique(image))
vals = 2 ** np.ceil(np.log2(vals))
noisy = np.random.poisson(image * vals) / float(vals)
return noisy
elif noise_typ =="speckle":
row,col,ch = image.shape
gauss = np.random.randn(row,col,ch)
gauss = gauss.reshape(row,col,ch)
noisy = image + image * gauss
return noisy
1. 添加椒盐噪声
img_aug,mask_aug = augments['image'],augments['mask']
noise_gauss = noisy("s&p", img_aug)
fig_plot(img_aug,noise_gauss)
2. 添加高斯噪声
img_aug,mask_aug = augments['image'],augments['mask']
noise_gauss = noisy("gauss", img_aug)
fig_plot(img_aug,noise_gauss)
2. 使用OpenCV完成图像旋转数据扩增
def rotate_bound(image,angle):
#获取图像的尺寸
#旋转中心
(h,w) = image.shape[:2]
(cx,cy) = (w/2,h/2)
#设置旋转矩阵
M = cv2.getRotationMatrix2D((cx,cy),-angle,1.0)
cos = np.abs(M[0,0])
sin = np.abs(M[0,1])
# 计算图像旋转后的新边界
nW = int((h*sin)+(w*cos))
nH = int((h*cos)+(w*sin))
# 调整旋转矩阵的移动距离(t_{x}, t_{y})
M[0,2] += (nW/2) - cx
M[1,2] += (nH/2) - cy
return cv2.warpAffine(image,M,(nW,nH))
i = 23557
img = cv2.imread('./train/'+train_mask['name'].iloc[i])
img2 = rotate_bound(img,20)
fig_plot(img,img2)
3. 使用albumentations其他的操作完成扩增操作
# 水平翻转
augments = A.HorizontalFlip(p=1)(image=img,mask=mask)
#augments = A.randomCrop(p=1,height=256,width=256)(image=img,mask=mask)
#augments = A.ShiftScaleRotate(image=img,mask=mask)
img_aug,mask_aug = augments['image'],augments['mask']
fig_plot(img_aug,mask_aug)
trfm = A.Compose([
A.Resize(256,256),
A.HorizontalFlip(p=0.5),
A.VerticalFlip(p=0.5),
A.RandomRotate90(),])
augments = trfm(image=img,mask=mask)
img_aug,mask_aug = augments['image'],augments['mask']
fig_plot(img_aug,mask_aug)
4. 使用pytorch完成赛题数据读取
- Dataset:数据集,对数据进行读取并进行数据扩增
- DataLoader:数据读取器,对Dataset进行封装并进行批量读取
import torch.utils.data as D
class TianChiDataset(D.Dataset):
def __init__(self,paths,rles,transform):
self.paths = paths
self.rles = rles
self.transform = transform
self.len = len(paths)
def __getitem__(self,index):
img = cv2.imread(self.paths[index])
mask = rle_decode(self.rles[index])
augments = self.transform(image=img,mask=mask)
return self.as_tensor(augments['image']),augments['mask'][None]
def __len__(self):
return self.len
IMAGE_SIZE = 256
trfm = A.Compose([
A.Resize(IMAGE_SIZE,IMAGE_SIZE),
A.HorizontalFlip(p=0.5),
A.VerticalFlip(p=0.5),
A.RandomRotate90(),
])
dataset = TianChiDataset(
train_mask['name'].values,
train_mask['mask'].fillna('').values,
trfm
)
loader = D.DataLoader(
dataset,batch_size=10,shuffle=True,num_workers=0
)
参考
- https://stackoverflow.com/questions/14435632/impulse-gaussian-and-salt-and-pepper-noise-with-opencv#https://stackoverflow.com/questions/14435632/impulse-gaussian-and-salt-and-pepper-noise-with-opencv#
- https://www.cnblogs.com/wmy-ncut/p/9995687.html
- https://www.cnblogs.com/mlan/p/8179028.html
- https://www.jianshu.com/p/9194f43fd68a