from langchain.llms import OpenAI from langchain.document_loaders import WebBaseLoader from langchain.indexes import VectorstoreIndexCreator from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain.chains import RetrievalQA from langchain.chat_models import ChatOpenAI from langchain.memory import ConversationBufferMemory from langchain.chains import ConversationalRetrievalChain import os # llm = OpenAI(openai_api_key="")#这里需要填入你的openai api key #设置代理,以便进行外部访问 os.environ['http_proxy'] = '' os.environ['https_proxy'] = '' #导入web页面做成向量数据库,存储在sqlite loader = WebBaseLoader("") #网站地址 data = loader.load() # index = VectorstoreIndexCreator().from_loaders([loader]) # index.query("") #查询内容 #进行文本切分 text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0) all_splits = text_splitter.split_documents(data) #导入open向量库 vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings()) #进行文本相似度计算,文本数据存储在缓存中,sqlite question = "" docs = vectorstore.similarity_search(question) #相似度计算公式有:余弦,欧氏距离,曼哈顿距离,这里用的是KNN(K近邻) llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) #temerature设置输出的多样性,0为选择最大的概率进行输出,最少的多样性 qa_chain = RetrievalQA.from_chain_type(llm,retriever=vectorstore.as_retriever()) qa_chain({"query": question}) #设置记忆力 memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) retriever = vectorstore.as_retriever() chat = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory) # result = chat({"question": ""}) # result['answer']
03-27
1198
![](https://csdnimg.cn/release/blogv2/dist/pc/img/readCountWhite.png)
12-13
2825
![](https://csdnimg.cn/release/blogv2/dist/pc/img/readCountWhite.png)
“相关推荐”对你有帮助么?
-
非常没帮助
-
没帮助
-
一般
-
有帮助
-
非常有帮助
提交