from typing import Iterator
import gradio as gr
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('-p', '--client_port', type=int, default='8005')
args = parser.parse_args()
client_port = args.client_port
from openai import OpenAI
# Set OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "http://127.0.0.1:8000/v1"
client = OpenAI(
api_key=openai_api_key,
base_url=openai_api_base,
)
def login(username, password):
# 校验代码
return True
def generate(
message: str,
chat_history: list,
request:gr.Request,
max_new_tokens: int = 4096,
) -> Iterator[str]:
model = "deepseek-coder-33B-instruct-AWQ"
system_prompt = "You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer."
messages = [ {"role": "system", "content": system_prompt}]
messages = []
for human, assistant in chat_history:
messages.append({"role": "user", "content": human })
messages.append({"role": "assistant", "content":assistant})
messages.append({"role": "user", "content": message})
chat_response = client.chat.completions.create(
user=request.username,
model=model,
messages=messages,
max_tokens=max_new_tokens,
stop=['<|BOT|>','<|EOT|>','<|im_end|>','<|endoftext|>'],
#top_p=top_p,
stream=True
)
outputs=''
for chunk in chat_response:
bot_message = chunk.choices[0].delta.content if chunk.choices[0].delta.content is not None else ""
outputs += bot_message
yield outputs
chat_interface = gr.ChatInterface(
concurrency_limit=10,
fn=generate,
title="Deepseek代码助手",
theme="soft",
examples=[
["用python给我写一个快排代码"], ["什么是二叉树"],["nginx允许跨域吗"]]
)
if __name__ == "__main__":
chat_interface.queue().launch(server_name="0.0.0.0",
server_port=client_port,root_path="/chat_code_web",
auth=login)
创建一个访问openai格式接口的gradio页面
最新推荐文章于 2024-05-06 00:08:33 发布