在这from timm import create_model
import torch
import torch.nn.functional as F
import cswin
from torchvision import transforms
model_ft = create_model('CSWin_144_24322_large_224',pretrained=False, num_classes=21842,)
model_ft.load_state_dict(torch.load('cswin_large_22k_224.pth',map_location='cpu')["state_dict_ema"])
model_ft.eval()
from PIL import Image
test=Image.open('UCF_101_frame/ApplyEyeMakeup/v_ApplyEyeMakeup_g01_c01/00002.jpg').convert('RGB')
trans=transforms.Compose([transforms.Resize((256,256)),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])
test=trans(test)
test=torch.unsqueeze(test, dim=0)
out=model_ft(test)
out=F.softmax(out)
pred_unk = torch.max(out, dim=-1)[0]
max=out.data.max(1)[1]