本地化部署Fastgpt+One-API+ChatGLM3-6b知识库_fastgpt本地部署接入oneapi 知识库(1)

最新推荐文章于 2024-05-03 22:10:56 发布

蚂蚁程序员香蕉

最新推荐文章于 2024-05-03 22:10:56 发布

阅读量596

点赞数 3

分类专栏： 2024年程序员学习文章标签： oneapi

本文链接：https://blog.csdn.net/m0_60635283/article/details/137443132

版权

    # CallFunction
    if isinstance(function_call, dict):
        function_call = FunctionCallResponse(**function_call)


        """
        In this demo, we did not register any tools.
        You can use the tools that have been implemented in our `tool_using` and implement your own streaming tool implementation here.
        Similar to the following method:
            function_args = json.loads(function_call.arguments)
            tool_response = dispatch_tool(tool_name: str, tool_params: dict)
        """
        tool_response = ""

        if not gen_params.get("messages"):
            gen_params["messages"] = []

        gen_params["messages"].append(ChatMessage(
            role="assistant",
            content=output,
        ))
        gen_params["messages"].append(ChatMessage(
            role="function",
            name=function_call.name,
            content=tool_response,
        ))

        # Streaming output of results after function calls
        generate = predict(request.model, gen_params)
        return EventSourceResponse(generate, media_type="text/event-stream")

    else:
        # Handled to avoid exceptions in the above parsing function process.
        generate = parse_output_text(request.model, output)
        return EventSourceResponse(generate, media_type="text/event-stream")

# Here is the handling of stream = False
response = generate_chatglm3(model, tokenizer, gen_params)

# Remove the first newline character
if response["text"].startswith("\n"):
    response["text"] = response["text"][1:]
response["text"] = response["text"].strip()
usage = UsageInfo()
function_call, finish_reason = None, "stop"
if request.functions:
    try:
        function_call = process_response(response["text"], use_tool=True)
    except:
        logger.warning("Failed to parse tool call, maybe the response is not a tool call or have been answered.")

if isinstance(function_call, dict):
    finish_reason = "function_call"
    function_call = FunctionCallResponse(**function_call)

message = ChatMessage(
    role="assistant",
    content=response["text"],
    function_call=function_call if isinstance(function_call, FunctionCallResponse) else None,
)

logger.debug(f"==== message ====\n{message}")

choice_data = ChatCompletionResponseChoice(
    index=0,
    message=message,
    finish_reason=finish_reason,
)
task_usage = UsageInfo.model_validate(response["usage"])
for usage_key, usage_value in task_usage.model_dump().items():
    setattr(usage, usage_key, getattr(usage, usage_key) + usage_value)
return ChatCompletionResponse(model=request.model, choices=[choice_data], object="chat.completion", usage=usage)

async def predict(model_id: str, params: dict):
global model, tokenizer

choice_data = ChatCompletionResponseStreamChoice(
    index=0,
    delta=DeltaMessage(role="assistant"),
    finish_reason=None
)
chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk")
yield "{}".format(chunk.model_dump_json(exclude_unset=True))

previous_text = ""
for new_response in generate_stream_chatglm3(model, tokenizer, params):
    decoded_unicode = new_response["text"]
    delta_text = decoded_unicode[len(previous_text):]
    previous_text = decoded_unicode

    finish_reason = new_response["finish_reason"]
    if len(delta_te