LangChain pdf的读取以及向量数据库的使用

文章介绍了如何使用Python库LangChain和JinaEmbeddings对PDF文件进行文本分析,通过嵌入和向量存储技术,实现基于上下文的问题回答功能。首先加载PDF文档,提取内容并生成嵌入向量,然后通过Chroma进行检索和提问,最后使用LangChain进行自然语言处理和输出解析。
摘要由CSDN通过智能技术生成

参考: https://www.pinecone.io/learn/series/langchain/langchain-expression-language/

以下使用了3399.pdf, Rockchip RK3399 TRM Part1

import ChatGLM
from langchain.chains import LLMChain
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import SimpleSequentialChain
from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter
from langchain_community.document_loaders import PyPDFLoader
import ChatGLM
from langchain.chains import LLMChain
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.chains import LLMMathChain
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_community.embeddings import JinaEmbeddings

# https://jina.ai/embeddings/
# https://python.langchain.com/docs/integrations/text_embedding/jina
# demo:  https://python.langchain.com/cookbook



llm = ChatGLM.ChatGLM_LLM()
loader = PyPDFLoader("3399.pdf")
documents = loader.load_and_split()

embeddings = JinaEmbeddings(
    jina_api_key="jina_fa2c341a2f634f1381f7cfec767150caSconYmQA2XRAcVKfZ7-Zboaqeydu", model_name="jina-embeddings-v2-base-en"
)

vectorstore = Chroma.from_documents(documents, embeddings)
retriever = vectorstore.as_retriever()

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
llm = ChatGLM.ChatGLM_LLM()
output_parser = StrOutputParser()
setup_and_retrieval = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
)
chain = setup_and_retrieval | prompt | llm | output_parser

print(chain.invoke("eFuse Function Description"))





更新之后的版本:

import ChatGLM

from langchain_community.document_loaders import PyPDFLoader

from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.embeddings import JinaEmbeddings
from langchain_core.runnables import RunnableParallel, RunnablePassthrough


llm = ChatGLM.ChatGLM_LLM()
loader = PyPDFLoader("西游记.pdf")
documents = loader.load_and_split()

embeddings = JinaEmbeddings(
    jina_api_key="jina_c5d02a61c97d4d79b88234362726e94aVLMTvF38wvrElYqpGYSxFtC5Ifhj", model_name="jina-embeddings-v2-base-zh"
)

# 第一次存入本地
# vectorstore = Chroma.from_documents(documents, embeddings,persist_directory="./chroma_db")

# 从本地加载
vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)

retriever = vectorstore.as_retriever()
template = """Answer the question based only on the following context,if can not ,please just say: I do not know,
please think step by step:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
llm = ChatGLM.ChatGLM_LLM()
output_parser = StrOutputParser()
setup_and_retrieval = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
)
chain = setup_and_retrieval | prompt | llm | output_parser
# print(chain.invoke("介绍下红楼梦"))
print(chain.invoke("第二十二回讲了什么"))

  • 3
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值