阿里云部署ui-tar模型

最新推荐文章于 2025-04-08 10:01:19 发布

li128ve980

最新推荐文章于 2025-04-08 10:01:19 发布

阅读量795

点赞数 3

文章标签：阿里云云计算

本文链接：https://blog.csdn.net/li128ve980/article/details/145467778

版权

1、快捷部署

魔搭社区

2、访问测试

import os
import json
import base64
import io
import openai
import re
import math
from PIL import Image, ImageDraw

def encode_image(image):
    """Encode image to base64 string."""
    img_bytes = io.BytesIO()
    image.save(img_bytes, format="PNG")
    return base64.b64encode(img_bytes.getvalue()).decode("utf-8")

def resize_image(image):
    """Resize image to control the maximum number of pixels."""
    max_pixels = 6000 * 28 * 28
    if image.width * image.height > max_pixels:
        max_pixels = 2700 * 28 * 28
    else:
        max_pixels = 1340 * 28 * 28
    resize_factor = math.sqrt(max_pixels / (image.width * image.height))
    width, height = int(image.width * resize_factor), int(image.height * resize_factor)
    return image.resize((width, height))

def draw_coordinates(image, coordinates):
    """Draw the detected coordinates on the image."""
    draw = ImageDraw.Draw(image)
    radius = min(image.width, image.height) // 15
    x, y = coordinates
    x, y = round(x / 1000 * image.width), round(y / 1000 * image.height)
    draw.ellipse((x - radius, y - radius, x + radius, y + radius), outline='red', width=2)
    draw.ellipse((x - 2, y - 2, x + 2, y + 2), fill='red')
    return image

def send_request(image, query):
    """Send request to the OpenAI API."""
    
    # 配置 OpenAI API 的地址和密钥
    openai.api_base = "your访问地址/v1"
    openai.api_key = "your token"
    model_name = "UI-TARS-7B-DPO"  # 你的模型名称
    
    if not openai.api_base or not openai.api_key:
        raise ValueError("Missing OPENAI_API_BASE or OPENAI_API_KEY in environment variables.")
    
    # 压缩图片
    image = resize_image(image)
    
    # 编码图片
    base64_image = encode_image(image)
    
    # 构造请求数据
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}},
                {"type": "text", "text": "Output only the coordinate of one box in your response. " + query},
            ],
        }
    ]
    
    response = openai.ChatCompletion.create(
        model=model_name,
        messages=messages,
        temperature=1.0,
        top_p=0.7,
        max_tokens=128,
        frequency_penalty=1
    )
    
    output_text = response["choices"][0]["message"]["content"]
    pattern = r"\((\d+),(\d+)\)"
    match = re.search(pattern, output_text)
    
    if match:
        coordinates = (int(match.group(1)), int(match.group(2)))
        return coordinates
    else:
        raise ValueError("No valid coordinates found in model response.")

# 测试代码
if __name__ == "__main__":
    test_image = Image.open(r"your image path")  # 替换为你的测试图片路径
    test_query = "报名方式下拉按钮"
    coordinates = send_request(test_image, test_query)
    print(f"Detected coordinates: {coordinates}")
    result_image = draw_coordinates(test_image, coordinates)
    result_image.show()