from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.llms import QianfanLLMEndpoint
from langchain_community.embeddings import QianfanEmbeddingsEndpoint
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain
import os
# 初始化大模型
llm_qianfan = QianfanLLMEndpoint(temperature=0.1)
# 导入文档
loder_txt = TextLoader(r'D:\PycharmProjects\MyAgent\texts\text1.txt', encoding='utf8')
docs_txt = loder_txt.load()
# 分割文档
text_splitter_txt = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=0,
separators=["\n\n", "\n", " ", "", "。", ","])
documents_txt = text_splitter_txt.split_documents(docs_txt)
embeddings_qf = QianfanEmbeddingsEndpoint()
# 文档导入向量数据库,如果之前已经生成就不重复导入,直接引用
if not os.path.exists("../chroma.sqlite3"):
vectordb = Chroma.from_documents(documents=documents_txt, embedding=embeddings_qf,
persist_directory="D:\PycharmProjects\MyAgent")
else:
vectordb = Chroma(
persist_directory="D:\PycharmProjects\MyAgent",
embedding_function=embeddings_qf,
)
# 创建提示词
prompt = ChatPromptTemplate.from_template("""使用下面的语料来回答本模板最末尾的问题。如果你不知道问题的答案,直接回答"抱歉,这个问题我还不清楚。",禁止随意编造答案。
为了保证答案尽可能简洁,你的回答必须不超过三句话,你的回答中不可以带有星号。
请注意!在每次回答结束之后,你都必须接上"感谢您的提问。"作为结束语
以下是一对问题和答案的样例:
请问:秦始皇的原名是什么?
秦始皇原名嬴政。感谢您的提问。
以下是语料:
<context>
{context}
</context>
Question:{input}""")
# 创建检索链
document_chain = create_stuff_documents_chain(llm_qianfan, prompt)
retriever = vectordb.as_retriever()
memory = ConversationSummaryMemory(
llm=llm_qianfan,
memory_key="chat_history",
return_message=True
)
qa = ConversationalRetrievalChain.from_llm(llm=llm_qianfan, retriever=retriever, memory=memory)
res = qa.invoke({"question": "根据勾股定理能推断出什么理论?"})
print(res["answer"])
用create_stuff_documents_chain构建一个完整的知识问答模型
最新推荐文章于 2024-12-14 04:36:09 发布