vLLM 用于 LLM 推理服务和客户端的访问方式 3 - 流式
flyfish
流式访问
包括如何解析
import requests
import json
# 配置
BASE_URL = "http://0.0.0.0:8000/v1"
API_KEY = "token-abc123"
MODEL_NAME = "LLM-Research/Meta-Llama-3-8B-Instruct"
INPUT_CONTENT = "Who are you?"
# 构建请求头
HEADERS = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {API_KEY}'
}
# 构建请求体
DATA = {
"model": MODEL_NAME,
"messages": [
{"role": "user", "content": INPUT_CONTENT}
],
"top_k": 50, # 控制生成文本时考虑的最高概率词汇的数量
"temperature": 0.7, # 控制生成文本的随机性
"max_tokens": 10, # 生成的最大令牌数
"presence_penalty": 0.1, # 控制重复惩罚
"frequency_penalty": 0.1, # 控制频率惩罚
"stream": True
}
def send_request(url, headers, data):
"""发送 POST 请求并返回响应"""
response = requests.post(url, headers=headers, json=data, stream=True)
return response
def process_stream_response(response):
"""处理流式响应并实时输出生成的文本"""
generated_text = ""
for line in response.iter_lines():
if line:
decoded_line = line.decode('utf-8')
print(f"Received data: {decoded_line}")
if decoded_line.startswith('data:'):
json_data = decoded_line[len('data:'):].strip()
if json_data == '[DONE]':
break
data = json.loads(json_data)
choices = data.get('choices', [])
if choices:
delta = choices[0].get('delta', {})
new_text = delta.get('content', '')
generated_text += new_text
print(new_text, end='')
return generated_text
def main():
url = f"{BASE_URL}/chat/completions"
response = send_request(url, HEADERS, DATA)
if response.status_code == 200:
generated_text = process_stream_response(response)
print("Final Generated Text:", generated_text)
else:
print(f"Request failed with status code {response.status_code}: {response.text}")
if __name__ == "__main__":
main()
接收数据
Received data: data: {"id":"chat-1","object":"chat.completion.chunk","created":1730428960,"model":"LLM-Research/Meta-Llama-3-8B-Instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}]}
Received data: data: {"id":"chat-1","object":"chat.completion.chunk","created":1730428960,"model":"LLM-Research/Meta-Llama-3-8B-Instruct","choices":[{"index":0,"delta":{"content":"I"},"logprobs":null,"finish_reason":null}]}
IReceived data: data: {"id":"chat-1","object":"chat.completion.chunk","created":1730428960,"model":"LLM-Research/Meta-Llama-3-8B-Instruct","choices":[{"index":0,"delta":{"content":" am"},"logprobs":null,"finish_reason":null}]}
amReceived data: data: {"id":"chat-1","object":"chat.completion.chunk","created":1730428960,"model":"LLM-Research/Meta-Llama-3-8B-Instruct","choices":[{"index":0,"delta":{"content":" L"},"logprobs":null,"finish_reason":null}]}
LReceived data: data: {"id":"chat-1","object":"chat.completion.chunk","created":1730428960,"model":"LLM-Research/Meta-Llama-3-8B-Instruct","choices":[{"index":0,"delta":{"content":"La"},"logprobs":null,"finish_reason":null}]}
LaReceived data: data: {"id":"chat-1","object":"chat.completion.chunk","created":1730428960,"model":"LLM-Research/Meta-Llama-3-8B-Instruct","choices":[{"index":0,"delta":{"content":"MA"},"logprobs":null,"finish_reason":null}]}
MAReceived data: data: {"id":"chat-1","object":"chat.completion.chunk","created":1730428960,"model":"LLM-Research/Meta-Llama-3-8B-Instruct","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}]}
,Received data: data: {"id":"chat-1","object":"chat.completion.chunk","created":1730428960,"model":"LLM-Research/Meta-Llama-3-8B-Instruct","choices":[{"index":0,"delta":{"content":" an"},"logprobs":null,"finish_reason":null}]}
anReceived data: data: {"id":"chat-1","object":"chat.completion.chunk","created":1730428960,"model":"LLM-Research/Meta-Llama-3-8B-Instruct","choices":[{"index":0,"delta":{"content":" AI"},"logprobs":null,"finish_reason":null}]}
AIReceived data: data: {"id":"chat-1","object":"chat.completion.chunk","created":1730428960,"model":"LLM-Research/Meta-Llama-3-8B-Instruct","choices":[{"index":0,"delta":{"content":" assistant"},"logprobs":null,"finish_reason":null}]}
assistantReceived data: data: {"id":"chat-1","object":"chat.completion.chunk","created":1730428960,"model":"LLM-Research/Meta-Llama-3-8B-Instruct","choices":[{"index":0,"delta":{"content":" developed"},"logprobs":null,"finish_reason":"length","stop_reason":null}]}
developedReceived data: data: [DONE]
Final Generated Text: I am LLaMA, an AI assistant developed
流式接收
data: {"id":"chat-1","object":"chat.completion.chunk","created":1730429430,"model":"LLM-Research/Meta-Llama-3-8B-Instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-1","object":"chat.completion.chunk","created":1730429430,"model":"LLM-Research/Meta-Llama-3-8B-Instruct","choices":[{"index":0,"delta":{"content":"I"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-1","object":"chat.completion.chunk","created":1730429430,"model":"LLM-Research/Meta-Llama-3-8B-Instruct","choices":[{"index":0,"delta":{"content":" am"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-1","object":"chat.completion.chunk","created":1730429430,"model":"LLM-Research/Meta-Llama-3-8B-Instruct","choices":[{"index":0,"delta":{"content":" L"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-1","object":"chat.completion.chunk","created":1730429430,"model":"LLM-Research/Meta-Llama-3-8B-Instruct","choices":[{"index":0,"delta":{"content":"La"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-1","object":"chat.completion.chunk","created":1730429430,"model":"LLM-Research/Meta-Llama-3-8B-Instruct","choices":[{"index":0,"delta":{"content":"MA"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-1","object":"chat.completion.chunk","created":1730429430,"model":"LLM-Research/Meta-Llama-3-8B-Instruct","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-1","object":"chat.completion.chunk","created":1730429430,"model":"LLM-Research/Meta-Llama-3-8B-Instruct","choices":[{"index":0,"delta":{"content":" an"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-1","object":"chat.completion.chunk","created":1730429430,"model":"LLM-Research/Meta-Llama-3-8B-Instruct","choices":[{"index":0,"delta":{"content":" AI"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-1","object":"chat.completion.chunk","created":1730429430,"model":"LLM-Research/Meta-Llama-3-8B-Instruct","choices":[{"index":0,"delta":{"content":" assistant"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-1","object":"chat.completion.chunk","created":1730429430,"model":"LLM-Research/Meta-Llama-3-8B-Instruct","choices":[{"index":0,"delta":{"content":" developed"},"logprobs":null,"finish_reason":"length","stop_reason":null}]}
data: [DONE]
Final Generated Text: I am LLaMA, an AI assistant developed
解析后
I
am
L
La
MA
,
an
AI
assistant
developed