BaseLLM
FastAPI部署LLM接口,然后调用接口
class CustomLLM(LLM):
temperature:float = 0.1
num_return_sequences:int = 1
max_new_tokens:int = 1024
headers:dict = {'Content-Type': 'application/json'}
@property
def _llm_type(self) -> str:
return "custom"
def _call(self, prompt: str,
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs
) -> str:
if stop is not None:
raise ValueError("stop kwargs are not permitted.")
message_user = dict(role="user", content=prompt)
data = {'messages': [message_user], **self._identifying_params}
data |= kwargs
json_str_data = json.dumps(data, ensure_ascii=False)
response = requests.post(url='http://127.0.0.1:12612/chat/completions', headers=self.headers, data=json_str_data)
data = response.json()["content"]
return data
@property
def _identifying_params(self) -> Mapping[str, Any]:
return {
'temperature': self.temperature,
'num_return_sequences': self.num_return_sequences,
'max_new_tokens': self.max_new_tokens
}
ChatModel
from langchain_core.language_models import BaseChatModel
from langchain_core.messages import (
AIMessage,
BaseMessage,
HumanMessage,
SystemMessage
)
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
class CustomChatModel(BaseChatModel):
temperature:float = 0.1
num_return_sequences:int = 1
max_new_tokens:int = 1024
headers:dict = {'Content-Type': 'application/json'}
@property
def _llm_type(self) -> str:
return "custom"
def _generate(self,
messages: list[BaseMessage],
stop: Optional[list[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any)->ChatResult:
def langchain2custom(lc_message):
if type(lc_message) == SystemMessage:
return dict(role='system', content=lc_message.content)
return dict(role='user', content=lc_message.content)
custom_messages = [langchain2custom(mes) for mes in messages]
data = {'messages': custom_messages, **self._identifying_params}
data |= kwargs
json_str_data = json.dumps(data, ensure_ascii=False)
response = requests.post(url='http://127.0.0.1:12612/chat/completions', headers=self.headers, data=json_str_data)
data = response.json()["content"]
out_message = AIMessage(content=data)
generation = ChatGeneration(message=out_message)
return ChatResult(generations=[generation])
@property
def _identifying_params(self) -> Mapping[str, Any]:
return {
'temperature': self.temperature,
'num_return_sequences': self.num_return_sequences,
'max_new_tokens': self.max_new_tokens
}
bind tools
可以参考from langchain_openai.chat_models.base import BaseChatOpenAI
中的bind_tools函数
参考
参考2