AI学习:图片对话 -通义千问VL-plus

SDK调用

简单示例

from http import HTTPStatus
import dashscope


def simple_multimodal_conversation_call():
    """Simple single round multimodal conversation call.
    """
    messages = [
        {
            "role": "user",
            "content": [
                {"image": "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg"},
                {"text": "这是什么?"}
            ]
        }
    ]
    response = dashscope.MultiModalConversation.call(model='qwen-vl-plus',
                                                     messages=messages)
    # The response status_code is HTTPStatus.OK indicate success,
    # otherwise indicate request is failed, you can get error code
    # and message from code and message.
    if response.status_code == HTTPStatus.OK:
        print(response)
    else:
        print(response.code)  # The error code.
        print(response.message)  # The error message.


if __name__ == '__main__':
    simple_multimodal_conversation_call()
{"status_code": 200, "request_id": "18f0a61d-b6ca-9ba2-90e3-3c1fa541a0a4", "code": "", "message": "", "output": {"text": null, "finish_reason": null, "choices": [{"finish_reason": null, "message": {"role": "assistant", "content": [{"text": "图中是一名女子和她的狗在海滩上。这只金毛犬坐在沙滩上,抬起前爪与女子握手互动。 女子穿着格子衬衫、黑色裤子和白色鞋子,她面带微笑看着狗狗。远处是海天一色的风景,夕阳洒下金色光芒给整个画面增添了一种温馨的感觉。"}]}}]}, "usage": {"input_tokens": 1277, "output_tokens": 69}}

本地图片

from dashscope import MultiModalConversation


def call_with_local_file():
    """Sample of use local file.
       linux&mac file schema: file:///home/images/test.png
       windows file schema: file://D:/images/abc.png
    """
    local_file_path = 'file://The_local_absolute_file_path'
    messages = [{
        'role': 'system',
        'content': [{
            'text': 'You are a helpful assistant.'
        }]
    }, {
        'role':
        'user',
        'content': [
            {
                'image': local_file_path
            },
            {
                'text': '图片里有什么东西?'
            },
        ]
    }]
    response = MultiModalConversation.call(model='qwen-vl-plus', messages=messages)
    print(response)


if __name__ == '__main__':
    call_with_local_file()
{"status_code": 200, "request_id": "01cc5d3a-365c-9d34-9781-d17f303febc4", "code": "", "message": "", "output": {"text": null, "finish_reason": null, "choices": [{"finish_reason": null, "message": {"role": "assistant", "content": [{"text": "图中是一只蓝眼睛的白猫,它正坐在地毯上。"}]}}]}, "usage": {"input_tokens": 1257, "output_tokens": 16}}

多轮对话

from http import HTTPStatus
from dashscope import MultiModalConversation


def conversation_call():
    """Sample of multiple rounds of conversation.
    """
    messages = [
        {
            "role": "user",
            "content": [
                {"image": "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg"},
                {"text": "这是什么?"},
            ]
        }
    ]
    response = MultiModalConversation.call(model=MultiModalConversation.Models.qwen_vl_chat_v1,
                                           messages=messages)
    # The response status_code is HTTPStatus.OK indicate success,
    # otherwise indicate request is failed, you can get error code
    # and message from code and message.
    if response.status_code == HTTPStatus.OK:
        print(response)
    else:
        print(response.code)  # The error code.
        print(response.message)  # The error message.
    messages.append({'role': response.output.choices[0].message.role,
                     'content': [{'text': response.output.choices[0].message.content}]})
    messages.append({"role": "user",
                     "content": [
                         {"text": "她们在干什么?", }
                     ]})

    response = MultiModalConversation.call(model='qwen-vl-plus',
                                           messages=messages)
    # The response status_code is HTTPStatus.OK indicate success,
    # otherwise indicate request is failed, you can get error code
    # and message from code and message.
    if response.status_code == HTTPStatus.OK:
        print(response)
    else:
        print(response.code)  # The error code.
        print(response.message)  # The error message.


if __name__ == '__main__':
    conversation_call()
{"status_code": 200, "request_id": "68f2f575-a351-99c6-96ba-daec51cb8908", "code": "", "message": "", "output": {"text": null, "finish_reason": null, "choices": [{"finish_reason": null, "message": {"role": "assistant", "content": "图中是一 名女子在沙滩上和狗玩耍,旁边的狗是一只拉布拉多犬,它们处于沙滩上。"}}]}, "usage": {"input_tokens": 285, "output_tokens": 25}}
{"status_code": 200, "request_id": "6bba05eb-4b53-9487-abbd-a68152e697c9", "code": "", "message": "", "output": {"text": null, "finish_reason": null, "choices": [{"finish_reason": null, "message": {"role": "assistant", "content": [{"text": "这名女子正在与她的宠物狗狗进行互动游戏,在海滩的背景下享受着美好的时光。"}]}}]}, "usage": {"input_tokens": 1316, "output_tokens": 19}}

流式输出


from dashscope import MultiModalConversation


def simple_multimodal_conversation_call():
    """Simple single round multimodal conversation call.
    """
    messages = [
        {
            "role": "user",
            "content": [
                {"image": "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg"},
                {"text": "这是什么?"}
            ]
        }
    ]
    responses = MultiModalConversation.call(model='qwen-vl-plus',
                                           messages=messages,
                                           stream=True)
    for response in responses:
        print(response)


if __name__ == '__main__':
    simple_multimodal_conversation_call()
{"status_code": 200, "request_id": "3c5da353-7332-941a-a197-f2be76fa82eb", "code": "", "message": "", "output": {"text": null, "finish_reason": null, "choices": [{"finish_reason": null, "message": {"role": "assistant", "content": [{"text": "图中是一名"}]}}]}, "usage": {"input_tokens": 1277, "output_tokens": 3}}
{"status_code": 200, "request_id": "3c5da353-7332-941a-a197-f2be76fa82eb", "code": "", "message": "", "output": {"text": null, "finish_reason": null, "choices": [{"finish_reason": null, "message": {"role": "assistant", "content": [{"text": "图中是一名女子和她的狗在海滩上。"}]}}]}, "usage": {"input_tokens": 1277, "output_tokens": 11}}
{"status_code": 200, "request_id": "3c5da353-7332-941a-a197-f2be76fa82eb", "code": "", "message": "", "output": {"text": null, "finish_reason": null, "choices": [{"finish_reason": null, "message": {"role": "assistant", "content": [{"text": "图中是一名女子和她的狗在海滩上。这只金毛犬坐在沙滩上,"}]}}]}, "usage": {"input_tokens": 1277, "output_tokens": 19}}
{"status_code": 200, "request_id": "3c5da353-7332-941a-a197-f2be76fa82eb", "code": "", "message": "", "output": {"text": null, "finish_reason": null, "choices": [{"finish_reason": null, "message": {"role": "assistant", "content": [{"text": "图中是一名女子和她的狗在海滩上。这只金毛犬坐在沙滩上,抬起前爪与女子握手互动。"}]}}]}, "usage": {"input_tokens": 1277, "output_tokens": 27}}
{"status_code": 200, "request_id": "3c5da353-7332-941a-a197-f2be76fa82eb", "code": "", "message": "", "output": {"text": null, "finish_reason": null, "choices": [{"finish_reason": null, "message": {"role": "assistant", "content": [{"text": "图中是一名女子和她的狗在海滩上。这只金毛犬坐在沙滩上,抬起前爪与女子握手互动。 女子穿着格子衬衫、黑色裤子"}]}}]}, "usage": {"input_tokens": 1277, "output_tokens": 35}}
{"status_code": 200, "request_id": "3c5da353-7332-941a-a197-f2be76fa82eb", "code": "", "message": "", "output": {"text": null, "finish_reason": null, "choices": [{"finish_reason": null, "message": {"role": "assistant", "content": [{"text": "图中是一名女子和她的狗在海滩上。这只金毛犬坐在沙滩上,抬起前爪与女子握手互动。 女子穿着格子衬衫、黑色裤子和白色鞋子,她面带微笑"}]}}]}, "usage": {"input_tokens": 1277, "output_tokens": 43}}
{"status_code": 200, "request_id": "3c5da353-7332-941a-a197-f2be76fa82eb", "code": "", "message": "", "output": {"text": null, "finish_reason": null, "choices": [{"finish_reason": null, "message": {"role": "assistant", "content": [{"text": "图中是一名女子和她的狗在海滩上。这只金毛犬坐在沙滩上,抬起前爪与女子握手互动。 女子穿着格子衬衫、黑色裤子和白色鞋子,她面带微笑看着狗狗。远处是海天一"}]}}]}, "usage": {"input_tokens": 1277, "output_tokens": 51}}
{"status_code": 200, "request_id": "3c5da353-7332-941a-a197-f2be76fa82eb", "code": "", "message": "", "output": {"text": null, "finish_reason": null, "choices": [{"finish_reason": null, "message": {"role": "assistant", "content": [{"text": "图中是一名女子和她的狗在海滩上。这只金毛犬坐在沙滩上,抬起前爪与女子握手互动。 女子穿着格子衬衫、黑色裤子和白色鞋子,她面带微笑看着狗狗。远处是海天一色的风景,夕阳洒下金色"}]}}]}, "usage": {"input_tokens": 1277, "output_tokens": 59}}
{"status_code": 200, "request_id": "3c5da353-7332-941a-a197-f2be76fa82eb", "code": "", "message": "", "output": {"text": null, "finish_reason": null, "choices": [{"finish_reason": null, "message": {"role": "assistant", "content": [{"text": "图中是一名女子和她的狗在海滩上。这只金毛犬坐在沙滩上,抬起前爪与女子握手互动。 女子穿着格子衬衫、黑色裤子和白色鞋子,她面带微笑看着狗狗。远处是海天一色的风景,夕阳洒下金色光芒给整个画面增添了一种温馨"}]}}]}, "usage": {"input_tokens": 1277, "output_tokens": 67}}
{"status_code": 200, "request_id": "3c5da353-7332-941a-a197-f2be76fa82eb", "code": "", "message": "", "output": {"text": null, "finish_reason": null, "choices": [{"finish_reason": null, "message": {"role": "assistant", "content": [{"text": "图中是一名女子和她的狗在海滩上。这只金毛犬坐在沙滩上,抬起前爪与女子握手互动。 女子穿着格子衬衫、黑色裤子和白色鞋子,她面带微笑看着狗狗。远处是海天一色的风景,夕阳洒下金色光芒给整个画面增添了一种温馨的感觉。"}]}}]}, "usage": {"input_tokens": 1277, "output_tokens": 69}}

在代码方面与普通的通义千问VL相比,只需要将MultiModalConversation.call(model='qwen-vl-plus',messages=messages, stream=True) 中的model改为qwen-vl-plus'

参数等参考通义千问VL文档

  • 6
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值