import gc
import os
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
processor = BlipProcessor.from_pretrained("./checkpoints/Salesforceblip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("./checkpoints/Salesforceblip-image-captioning-large")
# img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
path = r"C:\Users\pc\Desktop\sd_train\images"
out_path = r"C:\Users\pc\Desktop\sd_train\labels"
images = os.listdir(path)
for i in images:
txt_name = os.path.join(out_path,i[:-4]+".txt")
image = os.path.join(path,i)
raw_image = Image.open(image).convert('RGB')
# conditional image captioning
text = "a gdgzcj style photography of"
inputs = processor(raw_image, text, return_tensors="pt")
out = model.generate(**inputs)
txt_prompts = processor.decode(out[0], skip_special_tokens=True)
print(txt_prompts)
# unconditional image captioning
# inputs = processor(raw_image, return_tensors="pt")
# out = model.generate(**inputs)
# txt_no_prompts = processor.decode(out[0], skip_special_tokens=True)
# print(txt_no_prompts)
# print(txt_name)
with open(txt_name, 'w') as file:
file.write(txt_prompts)
gc.collect()
使用blip图像生成描述
最新推荐文章于 2024-04-08 13:14:40 发布