1.现在选用的模型如下 多模态输入支持一张图片 从网络上下载图片 from http import HTTPStatus import dashscope from dashscope import Generation from dashscope.api_entities.dashscope_response import Role dashscope.api_key ="" def simple_multimodal_conversation_call(img,question): messages = [ { "role": "user", "content": [ {"image": f"{img}"}, {"text": f"{question}"} ] } ] response = dashscope.MultiModalConversation.call(model='qwen-vl-plus', messages=messages) if response.status_code == HTTPStatus.OK: print(response.output.choices[0]['message']['content'][0]['text']) else: print(response.code) print(response.message) img = ''
text= ''
之前准备用chatgpt的接口 但花钱 先用通义千问测试
2.有想过先用图片转文字的模型+再加prompt发送 但后来发现有支持图片输入的模型
这个是图片转文字
from http import HTTPStatus import dashscope dashscope.api_key ="sk-a809926e939a412992ed4e36a1e1be12" def call_with_messages(): messages = [{'role': 'system', 'content': 'You are a helpful poster designer.'}, {'role': 'user', 'content': '");'}] response = dashscope.Generation.call( dashscope.Generation.Models.qwen_turbo, messages=messages, result_format='message', # set the result to be "message" format. ) if response.status_code == HTTPStatus.OK: print(response) else: print('Request id: %s, Status code: %s, error code: %s, error message: %s' % ( response.request_id, response.status_code, response.code, response.message ))
if __name__ == '__main__': call_with_messages()
3.识别多个图像 已弃用
img_captioning = pipeline(Tasks.image_captioning, model='damo/ofa_image-caption_coco_large_en', model_revision='v1.0.1') result = img_captioning('https://shuangqing-public.oss-cn-zhangjiakou.aliyuncs.com/donuts.jpg') print(result[OutputKeys.CAPTION]) # 'a bunch of donuts on a wooden board with popsicle sticks' # 目前caption支持了batch inference: result = img_captioning([{'image': 'https://shuangqing-public.oss-cn-zhangjiakou.aliyuncs.com/donuts.jpg'} for _ in range(3)], batch_size=2) for r in result: print(r[OutputKeys.CAPTION])
4.一个字一个字输出
def sample_sync_call_streaming(): # 设置需要生成的指令 prompt_text = '用萝卜、土豆、茄子做饭,给我个菜谱。' # 调用dashscope.Generation.call方法生成响应流 response_generator = dashscope.Generation.call( model='qwen-turbo', prompt=prompt_text, stream=True, top_p=0.8) head_idx = 0 # 遍历响应流 for resp in response_generator: # 获取每个响应中的文本段落 paragraph = resp.output['text'] # 打印文本段落中对应的文本 print("\r%s" % paragraph[head_idx:len(paragraph)], end='') # 如果文本段落中存在换行符,则更新head_idx的值 if (paragraph.rfind('\n') != -1): head_idx = paragraph.rfind('\n') + 1 # 调用sample_sync_call_streaming函数 sample_sync_call_streaming()