DINOv2 基于自定义数据集进行图片分类预测

目标:获取图片的embedding

一、方法一:下载DINOv2模型

(前提是安装好pytorch)

1. pip安装 

pip install transformers -i https://mirror.baidu.com/pypi/simple

2. 下载dinov2提供的预训练模型,需要自行前往HuggingFace网站上查找dinov2模型进行下载。模型需要下载三个文件:

config.json

preprocessor_config.json

pytorch_model.bin

下载完成后将上述三个文件放置在本地一个名为dinov2_base的文件夹中即可。

from transformers import AutoImageProcessor, AutoModel
from PIL import Image
import torch.nn as nn
import torch

# 定义环境
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")

# 加载dinov2模型
model_folder = './dinov2_base'
processor = AutoImageProcessor.from_pretrained(model_folder)
model = AutoModel.from_pretrained(model_folder).to(device)

# 提取图片特征
image = Image.open('img.jpg')
with torch.no_grad():
    inputs = processor(images=image1, return_tensors="pt").to(device)
    outputs = model(**inputs1)
    image_features = outputs1.last_hidden_state
    image_features = image_features1.mean(dim=1)

二、方法二:安装dinov2包

基于dinoV2分类模型修改_github dinov2-CSDN博客

# 实例化模型代码
from functools import partial
from dinov2.eval.linear import create_linear_input
from dinov2.eval.linear import LinearClassifier
from dinov2.eval.utils import ModelWithIntermediateLayers

model = dinov2_vits14(weights={'LVD142M':'./model/dinoV2/dinov2_vits14_pretrain.pth'})
autocast_ctx = partial(torch.cuda.amp.autocast, enabled=True, dtype=torch.float16)
self.feature_model = ModelWithIntermediateLayers( model, n_last_blocks=1, autocast_ctx=autocast_ctx).to(device)



# 实例化分类模型全连接层。
self.embed_dim = model.embed_dim
 # 100对应的是你需要分类的类别数量
self.classifier = LinearClassifier( self.embed_dim*2, use_n_blocks=1, use_avgpool=True, num_classes=100).to(device)  

# 冻结骨干网络
for param in model.feature_model.parameters():
    param.requires_grad = False

# neck结构,在输出后添加卷积的过程。

def autopad(k, p=None):  # kernel, padding
    # Pad to 'same'
    if p is None:
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
    return p

class Conv(nn.Module):
    # Standard convolution
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1,
                 act=True):  # ch_in, ch_out, kernel, stride, padding, groups
        super().__init__()
        self.conv = nn.Conv1d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
        self.bn = nn.BatchNorm1d(c2)
        self.act = nn.ReLU()

    def forward(self, x):
        return self.act(self.bn(self.conv(x)))


class neck_dinov2(nn.Module):
    def __init__(self, c0, c1, nc, dropout=0.5):
        super().__init__()
        self.conv1 = Conv1d(c0, c0)
        self.conv2 = Conv1d(c0, c0)
        self.drop = nn.Dropout(p=dropout)

        self.linear = nn.Linear(c1 * 2, nc)
        self.linear.weight.data.normal_(mean=0.0, std=0.01)
        self.linear.bias.data.zero_()

    def forward(self, x):
        feature, class_token = x[0]
        feature, class_token = torch.cat([feature.detach()], dim=-1), torch.cat([class_token.detach()], dim=-1)

        feature = self.drop(self.conv2(self.conv1(feature)))
        x0 = torch.cat((torch.mean(feature, dim=1), class_token), dim=-1)

        return self.linear(x0)


class HubConf(nn.Module):
    def __init__(self,cfg,pretrain_choice = 'frozen'):
        super(HubConf, self).__init__()

        model_path = cfg.MODEL.PRETRAIN_PATH
        self.cfg = cfg
        self.base = eval(cfg.MODEL.NAME)(weights={'LVD142M':model_path})
        self.in_planes = self.base.embed_dim

        self.consize = int((cfg.INPUT.SIZE_TRAIN[0]/14)*(cfg.INPUT.SIZE_TRAIN[1]/14))

        autocast_ctx = partial(torch.cuda.amp.autocast, enabled=True, dtype=torch.float16)
        self.feature_model = ModelWithIntermediateLayers(self.base, n_last_blocks=1, autocast_ctx=autocast_ctx)
        if pretrain_choice == 'frozen':
            for param in self.feature_model.parameters():
                param.requires_grad = False

        self.country_cls = neck_dinov2(self.consize, self.in_planes, cfg.MODEL.nc1, dropout=cfg.MODEL.DROPOUT)  # 分类头1
        self.cn_cls = neck_dinov2(self.consize,self.in_planes, cfg.MODEL.nc2, dropout=cfg.MODEL.DROPOUT)  # 分类头2
        self.ct_cls = neck_dinov2(self.consize,self.in_planes, cfg.MODEL.nc3, dropout=cfg.MODEL.DROPOUT)  # 分类头3


    def forward(self, x):

        global_feat = self.feature_model(x)  # ((bs, pach_h*pach_w,embed_dim ),(bs, embed_dim ))    ((1,(224/14)*(224/14), 384),(1, 384))
        country_score = self.country_cls(global_feat)
        cn_score = self.cn_cls(global_feat)
        ct_score = self.ct_cls(global_feat)

        return (country_score, cn_score,ct_score)


    def load_param(self, trained_path, device='cuda:0'):
        param_dict = torch.load(trained_path, map_location=device)
        for i in param_dict:
            #if 'classifier' in i:
            if i not in self.state_dict():
                print('not load param ', i)
                continue
            self.state_dict()[i].copy_(param_dict[i])


import torch
import torchvision.transforms as T
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import matplotlib
from dinov2.hub.backbones import dinov2_vitb14, dinov2_vitg14, dinov2_vitl14, dinov2_vits14


patch_h = 50
patch_w = 100
feat_dim = 384

transform = T.Compose([
    T.GaussianBlur(9, sigma=(0.1, 2.0)),
    T.Resize((patch_h * 14, patch_w * 14)),
    T.CenterCrop((patch_h * 14, patch_w * 14)),
    T.ToTensor(),
    T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])

# dinov2_vits14 = torch.hub.load('', 'dinov2_vits14', source='local').cuda()
vits14 = torch.hub.load('', 'dinov2_vits14', weights={'LVD142M':'./model/dinoV2/dinov2_vits14_pretrain.pth'},source='local').cuda()

features = torch.zeros(4, patch_h * patch_w, feat_dim)
imgs_tensor = torch.zeros(4, 3, patch_h * 14, patch_w * 14).cuda()

img_path = f'1.jpg'
img = Image.open(img_path).convert('RGB')
imgs_tensor[0] = transform(img)[:3]
with torch.no_grad():
    features_dict = vits14.forward_features(imgs_tensor)
    features = features_dict['x_norm_patchtokens']
  • 3
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值