使用了pdf 。
import ChatGLM
from langchain.chains import LLMChain
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import SimpleSequentialChain
from operator import itemgetter
from langchain_community.document_loaders import PyPDFLoader
import ChatGLM
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.chains import LLMMathChain
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_community.embeddings import JinaEmbeddings
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain import hub
# pip install langchainhub
# https://jina.ai/embeddings/
# https://python.langchain.com/docs/integrations/text_embedding/jina
# demo: https://python.langchain.com/cookbook
retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
llm = ChatGLM.ChatGLM_LLM()
loader = PyPDFLoader("3399.pdf")
documents = loader.load_and_split()
embeddings = JinaEmbeddings(
jina_api_key="jina_7e2c88997a50417aab497c15a4c6cec7vuBoG_CK-_0gYILG38ZIoJHTL1_q", model_name="jina-embeddings-v2-base-en"
)
vectorstore = Chroma.from_documents(documents, embeddings)
retriever = vectorstore.as_retriever()
combine_docs_chain = create_stuff_documents_chain(
llm, retrieval_qa_chat_prompt
)
retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)
# print(retrieval_chain.invoke({"input": "content summary,no more than 100 words"}))
print(retrieval_chain.invoke({"input": "the function of Watchdog, no more than 100 words"}))