transform数据处理样例

最新推荐文章于 2024-11-12 23:15:59 发布

Ysn0719

最新推荐文章于 2024-11-12 23:15:59 发布

阅读量103

点赞数

文章标签： python 目标检测 transformer 图像处理

本文链接：https://blog.csdn.net/yangsn0719/article/details/134684741

版权

测试时发现自己训练时，图像读取出现颜色通道未转换，这里记录一下，transform数据处理的示例

# -*- coding: utf-8 -*-
"""
@desc: 这是一个transform数据处理样例
参考原文:  
https://blog.csdn.net/yanzhiwen2/article/details/123870111
https://zhuanlan.zhihu.com/p/482679183

@date: 2023-11-28 15:25:48
@software: vscode
"""
import cv2
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from torchvision import transforms
from torchvision.datasets.folder import default_loader
import torchvision.transforms.functional as TF
import random
from typing import Sequence
    
img_path = '/workspace/code/multi_label_image_classification/input/01000007_20200324123301_corp_1.jpg'

###———————以下三种图像读取方式选择一种即可—————————
'''cv2读取 格式  <class 'numpy.ndarray'>'''
x = cv2.imread(img_path)    
x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)
print("input img:", x.size, type(x))

# # '''torchvision.datasets.folder.default_loader读取 格式 <class 'PIL.Image.Image'>'''
# x = default_loader(img_path)
# print("input img:", x.size, type(x))

# '''PTL读取 格式 <class 'PIL.Image.Image'>'''
# 读图像是RGBA四通道，A为透明通道，该通道值深度学习模型训练暂时用不到，因此用convert(‘RGB’)进行通道转换
x = Image.open(img_path).convert('RGB')
print("input img:", x.size, type(x))


'''实现在离散度数里面选择旋转角度,比如每次在30.90.120.150.240里面选择一个度数'''
class MyRotateTransform:
    def __init__(self, angles: Sequence[int]):
        self.angles = angles

    def __call__(self, x):
        angle = random.choice(self.angles)
        return TF.rotate(x, angle)
    

#### 测试自己训练时用的transform变化方式
### transforms的二十二个方法（transforms用法非常详细）参考链接：https://blog.csdn.net/qq_38410428/article/details/94719553
transform = transforms.Compose([
                                # ##'''cv2读取必须要用ToPILImage()转换成PIL图像格式,但是cv2读取后不转换颜色通道,最终结果的颜色通道会不正常'''
                                # transforms.ToPILImage(),                  # 如果是numpy请取消注释 
                                # transforms.RandomResizedCrop(90),         # 随机裁剪
                                # transforms.ColorJitter(0.05,0.05,0.05),   # 随机改变图像的亮度、对比度、饱和度、色调
                                # transforms.RandomRotation(30),            # 指定角度旋转
                                # transforms.RandomRotation(180,expand=True), 
                                # transforms.RandomGrayscale(p=0.5),        # 随机图片灰度化
                                # transforms.RandomHorizontalFlip(p=1),     # 水平翻转
                                # transforms.RandomVerticalFlip(p=1),       # 垂直翻转           
                                transforms.Resize((800,800)),               # 调整大小
                                # transforms.RandomRotation((90,180)),   
                                MyRotateTransform( [90, 180, 270]),                               
                                transforms.ToTensor(),                      # 将图片变成 Tensor，并且把数值normalize到[0,1]

                                # ## '''数据归一化处理,一般这里Normalize使用的数值是从ImageNet中总结出来的, 当然也可根据自己数据集计算均值方差'''
                                # transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]),
                                ])

img = transform(x)  # 执行transform操作
print("after transform: ", img.size())   

plt.figure(figsize=(8,8))
img = img.numpy().transpose((1,2,0))    # (3 420 495) -> (420,495,3)
print("shape: ",img.shape)

img = np.clip(img,0,1)
print("shape: ",img.shape)
plt.imshow(img)

plt.axis('on')      # 关掉坐标轴为 off
plt.title('image')  # 图像题目
plt.savefig('/workspace/code/multi_label_image_classification/input/savefig_example.jpg')
plt.show()