代码示例
from transformers import CLIPProcessor, CLIPModel
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
from IPython.display import Image, display
display(Image(filename="data_examples/truck.jpg"))
from PIL import Image
image = Image.open("data_examples/truck.jpg")
cls_list = ["dog", "woman", "man", "car", "truck",
"a black truck", "bird", "a white truck", "black cat"]
inputs = processor(text=cls_list, images=image,
return_tensors="pt", padding=True)
outputs = model(**inputs)
print(outputs.keys())
logits_per_image = outputs