PVT图像二分类推理v1.0

from PIL import Image
import cv2
import numpy as np
import onnxruntime as ort
import torchvision.transforms as transforms


# 定义默认的均值和标准差
# 这些值是针对 ImageNet 数据集计算出来的均值和标准差,它们被广泛用于预训练模型上,特别是那些在 ImageNet 上预训练的模型。这些值是基于 ImageNet 数据集中的 RGB 图像计算得出的
IMAGENET_DEFAULT_MEAN = [0.485, 0.456, 0.406]
IMAGENET_DEFAULT_STD = [0.229, 0.224, 0.225]

def build_transform(is_train, args):
    resize_im = args.input_size > 32
    if is_train:
        # this should always dispatch to transforms_imagenet_train
        transform = transforms.Compose([
            transforms.RandomResizedCrop(args.input_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD)
        ])
        if not resize_im:
            # replace RandomResizedCropAndInterpolation with
            # RandomCrop
            transform.transforms[0] = transforms.RandomCrop(
                args.input_size, padding=4)
        return transform

    t = []
    if resize_im:
        size = int((256 / 224) * args.input_size)
        t.append(
            transforms.Resize(size, interpolation=transforms.InterpolationMode.BICUBIC),  # to maintain same ratio w.r.t. 224 images
        )
        t.append(transforms.CenterCrop(args.input_size))  # 裁切到input_size尺寸

    t.append(transforms.ToTensor())
    t.append(transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD))
    return transforms.Compose(t)


def preprocess_image(image_path, target_size=(224, 224), is_train=False, args=None):
    """
    Preprocesses the input image.
    """
    # Load the image
    image = cv2.imread(image_path)
    # Convert to RGB
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Convert the NumPy array to a PIL Image
    image = Image.fromarray(image)


    # 输入是Nunmpy数组
    # Apply transformations
    transform = build_transform(is_train, args)
    image = transform(image)

    # Convert to numpy array
    image = np.array(image)

    # Add batch dimension
    image = np.expand_dims(image, axis=0)

    return image

    # 输入是Pytorch张量
    # # Apply transformations
    # transform = build_transform(is_train, args)
    # image = transform(image)

    # # Add batch dimension
    # image = image.unsqueeze(0)

    # return image


def run_inference(onnx_model_path, image_path, is_train=False, args=None):
    """
    Runs inference on the input image using the ONNX model.
    """
    # Load the ONNX model
    session = ort.InferenceSession(onnx_model_path)

    # Get the input name
    input_name = session.get_inputs()[0].name

    # Preprocess the image
    preprocessed_image = preprocess_image(image_path, is_train=is_train, args=args)

    # Run inference
    outputs = session.run(None, {input_name: preprocessed_image})
    # Get the predicted class
    predicted_class = np.argmax(outputs[0])

    return predicted_class

if __name__ == "__main__":
    # Path to the ONNX model
    onnx_model_path = 'checkpoints/pvt_v2_b5/checkpoint_0.onnx'
    # Path to the input image
    image_path = '/home/nvidia/aigc/classify/PVT/classification/AIGC/val/aigc_0/0002.png'
    
    # Define arguments
    args = type('', (), {})()  # Empty class for arguments
    args.input_size = 224  # Input size of the model
    args.color_jitter = 0.4  # Color jitter factor
    args.aa = 'rand'  # Auto augmentation policy
    args.train_interpolation = 'bicubic'  # Interpolation method
    args.reprob = 0.25  # Probability of performing mixup or cutmix when training
    args.remode = 'pixel'  # How to apply mixup/cutmix params. Per-pixel (default), per-labeled-instance, per-image
    args.recount = 1  # Number of mixes/cuts per image
    args.is_train = False  # Whether we are training or not

    # Run inference
    prediction = run_inference(onnx_model_path, image_path, is_train=False, args=args)

    # Print the prediction
    print(f"The predicted class is: {prediction}")


"""
[BUG1]
  File "infer.py", line 101, in <module>
    prediction = run_inference(onnx_model_path, image_path, is_train=False, args=args)
  File "infer.py", line 74, in run_inference
    preprocessed_image = preprocess_image(image_path, is_train=is_train, args=args)
  File "infer.py", line 52, in preprocess_image
    image = transform(image)
报错:    raise TypeError(f"img should be PIL Image. Got {type(img)}")
TypeError: img should be PIL Image. Got <class 'numpy.ndarray'>

[debug]
问题在于 preprocess_image 函数中的图像数据类型不匹配。
torchvision.transforms 中的变换函数期望输入的是一个 PIL Image 对象,而您的代码中使用 cv2.imread 加载的图像是一个 NumPy 数组。

为了修复这个问题,您需要将 NumPy 数组转换为 PIL Image 对象,然后再应用变换。您可以使用 PIL.Image.fromarray 方法来实现这一转换。

"""
### 回答1: 示例代码:# 导入所需模块 from keras.applications.vgg16 import VGG16 from keras.preprocessing import image from keras.applications.vgg16 import preprocess_input from keras.models import Model import numpy as np# 加载VGG16模型 model = VGG16(weights='imagenet', include_top=True)# 提取模型的某一层的特征图 model_output = model.get_layer('block5_pool').output# 构建一个新的模型,输入为原模型的输入,输出为提取的某一层特征图 model = Model(inputs=model.input, outputs=model_output)# 读取图像 img_path = 'path/to/image.jpg' img = image.load_img(img_path, target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x)# 使用PVT模型进行图像分割 features = model.predict(x) print("图像分割完成") ### 回答2: PVT(Pyramid Vision Transformer)是一种新兴的图像分割模型,结合了金字塔结构和Transformer模型的优点。下面是一个简单的用PVT模型进行图像分割的代码示例: 1. 导入相关库和模块: ``` import torch from torch import nn from torchvision.models import resnet50 from torchvision.transforms import functional as F from torchvision.models.segmentation.deeplabv3 import DeepLabV3 ``` 2. 加载PVT模型和预训练权重: ``` class PVT(nn.Module): def __init__(self): super(PVT, self).__init__() # 在此处定义PVT模型结构 def forward(self, x): # 在此处实现PVT模型的前向传播 model = PVT() model.load_state_dict(torch.load('pvt_pretrained_weights.pt')) ``` 3. 定义图像分割函数: ``` def segment_image(image_path): # 加载图像 image = Image.open(image_path) # 将图像转换为Tensor格式 image_tensor = F.to_tensor(image) # 调整图像尺寸 image_tensor = F.resize(image_tensor, (256, 256)) # 图像归一化 image_tensor = F.normalize(image_tensor, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 添加批处理维度 image_tensor = torch.unsqueeze(image_tensor, dim=0) # 使用PVT模型进行图像分割 with torch.no_grad(): output = model(image_tensor) # 获取分割结果 seg_map = output['out'][0].argmax(0).byte() # 返回分割结果 return seg_map ``` 4. 调用图像分割函数进行分割: ``` segmented_image = segment_image('input_image.jpg') segmented_image.save('segmented_image.jpg') ``` 这是一个简单的用PVT模型进行图像分割的代码示例。实际情况中,可能需要根据具体的问题进行模型的调整和优化。 ### 回答3: PVT(Predictive Value Theory)模型是一种常用的图像分割方法,可以根据像素的相似性将图像划分为不同的区域。下面是一个使用PVT模型进行图像分割的代码段: ```python import numpy as np from skimage import color from skimage import segmentation # 读取图像 img = color.rgb2lab(img) # 初始化分割器 segmenter = segmentation.pvt.PVTSegmenter() # 计算PVT分割 labels = segmenter.segment(img) # 显示分割结果 segmented_img = color.label2rgb(labels) # 保存分割结果 segmented_img.save("segmented_img.png") ``` 首先,我们需要导入需要的库,包括numpy、skimage中的color和segmentation模块。然后,我们读取待分割的图像,并使用color模块的rgb2lab函数将图像从RGB空间转换为Lab空间,以获得更好的色彩处理效果。 接着,我们初始化PVT模型的分割器,即PVTSegmenter对象。然后,我们调用segment方法传入Lab图像,得到每个像素点的标签信息。 最后,我们使用color模块的label2rgb函数将标签信息转换为彩色图像,以显示分割结果。并可以使用save方法将分割结果保存到指定的文件中。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值