本小项目,是根据多张拼接的图片,使用gpt4进行批量识别,并根据提示词分析
1、新建一个recognition_project目录,把图片放到default下,然后把脚本放到目录下
2、配置gpt的key
api_key是openai的key
base_url 是中转站或者openai的官网地址,如果你买的中转站的,就用中转站的地址即可
3、执行脚本即可
recognition_image.py
import base64
import requests
import datetime
import os
import time
from tqdm import tqdm
# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def recognition_image(image_file, base_url, api_key):
# Path to your image
images = image_file[0]
file = image_file[1]
# Getting the base64 string
base64_image = encode_image(images)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
payload = {
"model": "gpt-4-vision-preview",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": f"这是一张广告图片,名称是{file},该图片包含多张关键帧截图,根据你的理解,告诉我它的行业,宣传的产品,"
f"如果你不能识别,直接告诉我不能识别就行。"
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
"max_tokens": 300
}
response = requests.post(f"{base_url}/v1/chat/completions", headers=headers, json=payload)
# 检查响应状态码
if response.status_code == 200:
# 解析响应数据
response_data = response.json()
text = response_data['choices'][0]['message']['content']
return text
else:
print(response.status_code)
print(response.text)
print(f"{file}: 请求失败")
return "识别失败"
def check_file_existence(file_path):
return os.path.exists(file_path)
def list_jpg_files(directory):
jpg_files = []
for root, dirs, files in os.walk(directory):
for file in files:
if file.lower().endswith('.jpg'):
file_path = os.path.join(root, file)
windows_absolute_path = os.path.abspath(file_path)
jpg_files.append([windows_absolute_path, file])
return jpg_files
def process_recognition(file_array, base_url, api_key):
max_retries = 2
retries = 0
while retries < max_retries:
text = recognition_image(file_array, base_url, api_key)
result = text.split("\n")
if len(result) == 4:
result = [row.replace('/', '&').split("-")[1] for row in result]
result = "-".join(result)
result = result.replace("无法识别","").replace("无法辨认","").replace("无法确认","")
return result
else:
retries += 1
time.sleep(1)
# print("Max retries reached. Unable to get valid result.")
return "fail"
def recognition_main(file_array, base_url, api_key):
text = process_recognition(file_array, base_url, api_key)
# print('text', text)
windows_absolute_image = file_array[0]
image_file = text + '_'+ file_array[1]
last_directory = os.path.dirname(windows_absolute_image)
# print("last_directory", last_directory)
new_iamge = os.path.join(last_directory, image_file)
if check_file_existence(windows_absolute_image):
os.rename(windows_absolute_image, new_iamge)
def main(directory, base_url, api_key):
start_time = datetime.datetime.now()
jpg_files = list_jpg_files(directory)
for file_array in tqdm(jpg_files):
try:
recognition_main(file_array, base_url, api_key)
time.sleep(1)
except Exception as e:
print('错误:',e)
print(datetime.datetime.now() - start_time)
if __name__ == '__main__':
api_key = "sk-xxxxx"
base_url = 'https://xxx.cn'
directory = './default'
main(directory, base_url, api_key)