import torch
import clip
from PIL import Image
# 加载预训练模型
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load('ViT-B/32', device='cpu')
# 加载图像
image = Image.open('日产.jpeg')
# 对图像进行预处理
image_input = preprocess(image).unsqueeze(0)
# 运行模型
with torch.no_grad():
image_features = model.encode_image(image_input)
# 加载类别标签
class_labels = ['cat', 'dog', 'flower', 'food', 'car','cann']
# 加载类别描述
class_descriptions = clip.tokenize(class_labels).to(device)
# 计算图像与类别描述之间的相似度
logits_per_image, logits_per_text = model(image_input, class_descriptions)
probas = logits_per_image.softmax(dim=-1).cpu().detach().numpy()
# 输出预测结果
for i, class_label in enumerate(class_labels):
print(f"{class_label}: {probas[0][i]}")
特别注意 三个库的导入
pip3 install Pillow
pip3 install torch
#特别是 clip 别导入错了
pip3 install openai-clip
#额外安装的库
pip3 install torchvision