测试时发现自己训练时,图像读取出现颜色通道未转换,这里记录一下,transform数据处理的示例
# -*- coding: utf-8 -*-
"""
@desc: 这是一个transform数据处理样例
参考原文:
https://blog.csdn.net/yanzhiwen2/article/details/123870111
https://zhuanlan.zhihu.com/p/482679183
@date: 2023-11-28 15:25:48
@software: vscode
"""
import cv2
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from torchvision import transforms
from torchvision.datasets.folder import default_loader
import torchvision.transforms.functional as TF
import random
from typing import Sequence
img_path = '/workspace/code/multi_label_image_classification/input/01000007_20200324123301_corp_1.jpg'
###———————以下三种图像读取方式选择一种即可—————————
'''cv2读取 格式 <class 'numpy.ndarray'>'''
x = cv2.imread(img_path)
x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)
print("input img:", x.size, type(x))
# # '''torchvision.datasets.folder.default_loader读取 格式 <class 'PIL.Image.Image'>'''
# x = default_loader(img_path)
# print("input img:", x.size, type(x))
# '''PTL读取 格式 <class 'PIL.Image.Image'>'''
# 读图像是RGBA四通道,A为透明通道,该通道值深度学习模型训练暂时用不到,因此用convert(‘RGB’)进行通道转换
x = Image.open(img_path).convert('RGB')
print("input img:", x.size, type(x))
'''实现在离散度数里面选择旋转角度,比如每次在30.90.120.150.240里面选择一个度数'''
class MyRotateTransform:
def __init__(self, angles: Sequence[int]):
self.angles = angles
def __call__(self, x):
angle = random.choice(self.angles)
return TF.rotate(x, angle)
#### 测试自己训练时用的transform变化方式
### transforms的二十二个方法(transforms用法非常详细)参考链接:https://blog.csdn.net/qq_38410428/article/details/94719553
transform = transforms.Compose([
# ##'''cv2读取必须要用ToPILImage()转换成PIL图像格式,但是cv2读取后不转换颜色通道,最终结果的颜色通道会不正常'''
# transforms.ToPILImage(), # 如果是numpy请取消注释
# transforms.RandomResizedCrop(90), # 随机裁剪
# transforms.ColorJitter(0.05,0.05,0.05), # 随机改变图像的亮度、对比度、饱和度、色调
# transforms.RandomRotation(30), # 指定角度旋转
# transforms.RandomRotation(180,expand=True),
# transforms.RandomGrayscale(p=0.5), # 随机图片灰度化
# transforms.RandomHorizontalFlip(p=1), # 水平翻转
# transforms.RandomVerticalFlip(p=1), # 垂直翻转
transforms.Resize((800,800)), # 调整大小
# transforms.RandomRotation((90,180)),
MyRotateTransform( [90, 180, 270]),
transforms.ToTensor(), # 将图片变成 Tensor,并且把数值normalize到[0,1]
# ## '''数据归一化处理,一般这里Normalize使用的数值是从ImageNet中总结出来的, 当然也可根据自己数据集计算均值方差'''
# transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]),
])
img = transform(x) # 执行transform操作
print("after transform: ", img.size())
plt.figure(figsize=(8,8))
img = img.numpy().transpose((1,2,0)) # (3 420 495) -> (420,495,3)
print("shape: ",img.shape)
img = np.clip(img,0,1)
print("shape: ",img.shape)
plt.imshow(img)
plt.axis('on') # 关掉坐标轴为 off
plt.title('image') # 图像题目
plt.savefig('/workspace/code/multi_label_image_classification/input/savefig_example.jpg')
plt.show()