在pytorch1.11中进行图像的深度学习时,数据增强这块记录一下。主要通过“旋转”变换来学习class
类的使用和def
函数的写法。
import random
import scipy.ndimage as ndi
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as imgplt
def transform_matrix_offset_center(matrix, x, y):
o_x = float(x) / 2 + 0.5
o_y = float(y) / 2 + 0.5
offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]])
reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]])
transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix)
return transform_matrix
def apply_transform(x,
transform_matrix,
channel_index=2,
fill_mode='nearest',
cval=0.,
order=1):
x = np.rollaxis(x, channel_index, 0)
final_affine_matrix = transform_matrix[:2, :2]
final_offset = transform_matrix[:2, 2]
channel_images = [ndi.interpolation.affine_transform(x_channel,
final_affine_matrix,
final_offset,
order=order,
mode=fill_mode,
cval=cval) for x_channel in x]
x = np.stack(channel_images, axis=0)
x = np.rollaxis(x, 0, channel_index + 1)
return x
'''旋转变换'''
def rotation(x,
rg=20,
is_random=False,
row_index=0,
col_index=1,
channel_index=2,
fill_mode='nearest',
cval=0.,
order=1):
if is_random:
theta = np.pi / 180 * np.random.uniform(-rg, rg)
else:
theta = np.pi / 180 * rg
rotation_matrix = np.array(
[[np.cos(theta), -np.sin(theta), 0],
[np.sin(theta), np.cos(theta), 0],
[0, 0, 1]])
h, w = x.shape[row_index], x.shape[col_index]
# 调用函数transform_matrix_offset_center()
transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w)
# 调用函数apply_transform()
x = apply_transform(
x,
transform_matrix,
channel_index,
fill_mode,
cval,
order)
return x
'''数据增强'''
def image_augmentation(img,
row_index=0,
col_index=1,
channel_index=2,
is_rotate=False,
is_crop=False,
is_flip=False,
is_shift=False):
row = img.shape[row_index]
col = img.shape[col_index]
channel = img.shape[channel_index]
if is_rotate:
rint = random.randint(0, 1)
if rint == 0:
img = rotation(img,
rg=10,
is_random=True,
row_index=row_index,
col_index=col_index,
channel_index=channel_index)
return img
def fun(img):
image = image_augmentation(img, is_rotate=True)
# plt.imshow(image)
# plt.show()
# print(image.shape)
return image
# 读取原始图像
img_0 = imgplt.imread('C:\\Users\\Administrator\\Desktop\\梅花.jpg', 1)
print('img_0.shape:', '\n', img_0.shape)
plt.imshow(img_0)
plt.show()
# 旋转
img_1 = fun(img=img_0)
print('img_1.shape:', '\n', img_1.shape)
plt.imshow(img_1)
plt.show()
# resize
img_2 = cv2.resize(img_0, (668, 668), (0.5, 0.25))
print('img_2.shape:', '\n', img_2.shape)
plt.imshow(img_2)
plt.show()
# 改变通道数
img_3 = np.transpose(img_0, (2, 0, 1))
print('img_3.shape:', '\n', img_3.shape)
# plt.imshow(img_3)
plt.show()
程序运行后的结果:
总结
1 图像变换的基本操作
在运行过程中,如果用cv2.imread()
读取图片、且通过plt.imshow()
、plt.show()
来显示图片,图片显示有误,如下图:
import cv2
import matplotlib.pyplot as plt
# 读取原始图像
img_0 = cv2.imread('C:\\Users\\Administrator\\Desktop\\1.jpg', 1)
print('img_0.shape:', '\n', img_0.shape)
plt.imshow(img_0)
plt.show()
解决方法:
参考链接:matplotlib读取图片并显示
引入import matplotlib.image as imgplt
,并用imgplt.imread
读取图片,则显示正常,如下图:
import matplotlib.pyplot as plt
import matplotlib.image as imgplt
# 读取原始图像
img_0 = imgplt.imread('C:\\Users\\Administrator\\Desktop\\1.jpg', 1)
print('img_0.shape:', '\n', img_0.shape)
plt.imshow(img_0)
plt.show()
2 通过“旋转”变换来学习class
类的使用和def
函数的写法
import random
import scipy.ndimage as ndi
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as imgplt
def transform_matrix_offset_center(matrix, x, y):
o_x = float(x) / 2 + 0.5
o_y = float(y) / 2 + 0.5
offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]])
reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]])
transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix)
return transform_matrix
def apply_transform(x,
transform_matrix,
channel_index=2,
fill_mode='nearest',
cval=0.,
order=1):
x = np.rollaxis(x, channel_index, 0)
final_affine_matrix = transform_matrix[:2, :2]
final_offset = transform_matrix[:2, 2]
channel_images = [ndi.interpolation.affine_transform(x_channel,
final_affine_matrix,
final_offset,
order=order,
mode=fill_mode,
cval=cval) for x_channel in x]
x = np.stack(channel_images, axis=0)
x = np.rollaxis(x, 0, channel_index + 1)
return x
def rotation(x,
rg=20,
is_random=False,
row_index=0,
col_index=1,
channel_index=2,
fill_mode='nearest',
cval=0.,
order=1):
if is_random:
theta = np.pi / 180 * np.random.uniform(-rg, rg)
else:
theta = np.pi / 180 * rg
rotation_matrix = np.array(
[[np.cos(theta), -np.sin(theta), 0],
[np.sin(theta), np.cos(theta), 0],
[0, 0, 1]])
h, w = x.shape[row_index], x.shape[col_index]
transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w)
x = apply_transform(
x,
transform_matrix,
channel_index,
fill_mode,
cval,
order)
return x
''' 定义类'''
class MyReader:
def __init__(self, iscolor=True):
self.iscolor = iscolor
def img_augmentation(self, img, row_index=0, col_index=1, channel_index=2, is_rotate=False):
if is_rotate:
rint = random.randint(0, 1)
if rint == 0:
img = rotation(img,
rg=10,
is_random=True,
row_index=row_index,
col_index=col_index,
channel_index=channel_index)
return img
def train_reader(self, img):
self.img = img
image = cv2.cvtColor(self.img, cv2.IMREAD_COLOR)
image = self.img_augmentation(image, is_rotate=True)
plt.imshow(image)
plt.show()
print(image.shape)
return image
iscolor = True
myreader = MyReader(iscolor=iscolor) # 初始化类
img_0 = imgplt.imread('C:\\Users\\Administrator\\Desktop\\梅花.jpg')
myreader.train_reader(img=img_0) # 调用类中的函数
运行完后,图片显示如下:
3.注意:如果用cv2.imread()
读取文件时,文件及文件名最好是英文,否则会报错!!!
img_0 = cv2.imread('C:\\Users\\Administrator\\Desktop\\梅花.jpg')
改为英文状态后,除了颜色与原图不一样外,还是基本能显示出图像的。
img_0 = cv2.imread('C:\\Users\\Administrator\\Desktop\\meihua.jpg')