import json
import os
import requests
def model_complete_chat(
model,
prompt,
system_prompt=None,
) -> str:
host = "https://api.zhizengzeng.com/v1/chat/completions"
api_key = 'xxx'
# 构建请求头
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
# 构建请求体
data = {
"model": model,
"messages": [
{
"role": "user",
"content": prompt
}
]
}
try:
# 发送POST请求
response = requests.post(host, headers=headers, json=data)
# 打印响应内容
response_json = response.json()
if res_choices := response_json['choices']:
message = res_choices[0]['message']
return message['content']
except Exception as e:
print(f"调用大模型补全失败,{e}")
prompt = """
请帮我分析投机解码的前沿进展,是否有比eagle2效果更好的方案,另外请分析下eagle2的优化点有哪些,如何进一步提升加速效果?
"""
result = model_complete_chat("gemini-2.0-flash-thinking-exp", prompt)
print(result)
聊天窗口:
http://gpt.zhizengzeng.com:16888/#/chat/
可以选择或输入模型名称