使用blip2实现批量以图生文
首先Blip2的环境搭建
参考的是[https://blog.csdn.net/qq_44442727/article/details/135016054]
博主写的太好了点赞!但是博主没写批量生成,所以我做一下笔记~
环境搭建补充
我跑的时候跑了模型错误发现他更新的模型也需要安装务必全部文件的都下载,不要嫌文件太大
批量以图生文代码
import os
#这里要添加GPU要不然就用CPU跑了,CPU跑超级慢!!
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
import pandas as pd
import torch
import requests
from transformers import Blip2Processor, Blip2ForConditionalGeneration
from lavis.models import load_model_and_preprocess
from PIL import Image
device = "cuda" if torch.cuda.is_available() else "cpu"
# "./projects/blip2/" 这个地方是你存放上面下载好的所有文件的文件夹路径!!!
processor = Blip2Processor.from_pretrained("./projects/blip2/")
model = Blip2ForConditionalGeneration.from_pretrained(
"./projects/blip2/", torch_dtype=torch.float16
)
model.to(device)
# 数据集文件夹路径
dataset_folder = './image/RGB/'
image_files = os.listdir(dataset_folder)
data = []
#批量生成
for image_file in image_files:
if image_file.endswith('.jpg') or image_file.endswith('.png'):
image_path = os.path.join(dataset_folder, image_file)
image = Image.open(image_path).convert('RGB')
inputs = processor(images=image, return_tensors="pt").to(device, torch.float16)
generated_ids = model.generate(**inputs)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
#最好print一下看看效果
print(generated_text)
data.append([image_file, generated_text])
#把结果存放在excel里其中第一列存文件名,第二列存放描述的文本内容
df = pd.DataFrame(data, columns=['Image File', 'Generated Text'])
excel_file = './excel/file.xlsx'
df.to_excel(excel_file, index=False)
实验结果
跑了它的demo,效果真的不错!