LLM -RAG
NVIDIA AI Foundation Endpoints | 🦜️🔗 LangChain
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT
from langchain.chains.question_answering import load_qa_chain
from langchain.memory import ConversationBufferMemory
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
import getpass
import os
if not os.environ.get("NVIDIA_API_KEY", "").startswith("nvapi-"):
nvapi_key = getpass.getpass("Enter your NVIDIA API key: ")
assert nvapi_key.startswith("nvapi-"), f"{nvapi_key[:5]}... is not a valid key"
os.environ["NVIDIA_API_KEY"] = nvapi_key
定义html_document_loader网络文档数据加载器:进行文档解析返回文档内容
import re
from typing import List, Union
import requests
from bs4 import BeautifulSoup
def html_document_loader(url: Union[str, bytes]) -> str:
"""
Loads the HTML content of a document from a given URL and return it's content.
Args:
url: The URL of the document.
Returns:
The content of the document.
Raises:
Exception: If there is an error while making the HTTP request.
"""
try:
response = requests.get(url)
html_content = response.text
except Exception as e:
print(f"Failed to load {url} due to exception {e}")
return ""
try:
# 创建Beautiful Soup对象用来解析html
soup = BeautifulSoup(html_content, "html.parser")
# 删除脚本和样式标签
for script in soup(["script", "style"]):
script.extract()
# 从 HTML 文档中获取纯文本
text = soup.get_text()
# 去除空格换行符
text = re.sub("\s+", " ", text).strip()
return text
except Exception as e:
print(f"Exception {e} while loading document")
return ""
定义数据向量化工具
def create_embeddings(embedding_path: str = "./embed"):
embedding_path = "./embed"
print(f"Storing embeddings to {embedding_path}")
# 包含 NVIDIA NeMo toolkit技术文档的网页列表
urls = [
"https://docs.nvidia.com/nemo-framework/user-guide/latest/nemotoolkit/"
]
# 使用html_document_loader对NeMo toolkit技术文档数据进行加载
documents = []
for url in urls:
document = html_document_loader(url)
documents.append(document)
#进行chunk分词分块处理
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=0,
length_function=len,
)
texts = text_splitter.create_documents(documents)
index_docs(url, text_splitter, texts, embedding_path)
print("Generated embedding successfully")
定义index_docs函数作为构建向量储存和文档检索工具
def index_docs(url: Union[str, bytes], splitter, documents: List[str], dest_embed_dir) -> None:
"""
Split the document into chunks and create embeddings for the document
Args:
url: Source url for the document.
splitter: Splitter used to split the document
documents: list of documents whose embeddings needs to be created
dest_embed_dir: destination directory for embeddings
Returns:
None
"""
# 通过NVIDIAEmbeddings工具类调用NIM中的"ai-embed-qa-4"向量化模型
embeddings = NVIDIAEmbeddings(model="ai-embed-qa-4")
for document in documents:
texts = splitter.split_text(document.page_content)
# 根据url清洗好的文档内容构建元数据
metadatas = [document.metadata]
# 创建embeddings嵌入并通过FAISS进行向量存储
if os.path.exists(dest_embed_dir):
update = FAISS.load_local(folder_path=dest_embed_dir, embeddings=embeddings, allow_dangerous_deserialization=True)
update.add_texts(texts, metadatas=metadatas)
update.save_local(folder_path=dest_embed_dir)
else:
docsearch = FAISS.from_texts(texts, embedding=embeddings, metadatas=metadatas)
docsearch.save_local(folder_path=dest_embed_dir)
使用定义好的相关函数和工具执行文档嵌入Embeddings的生成
create_embeddings()
embedding_model = NVIDIAEmbeddings(model="ai-embed-qa-4")
# Embed documents
embedding_path = "embed/"
docsearch = FAISS.load_local(folder_path=embedding_path, embeddings=embedding_model, allow_dangerous_deserialization=True)
langchain结合NIM实现LLM-RAG检索:并对比未使用RAG的llm输出与使用LLM-RAG的输出效果
llm = ChatNVIDIA(model="ai-mixtral-8x7b-instruct")
result = llm.invoke("Tell me something about nemo")
print(result.content)
输出如下
Nemo is a popular name for a clownfish, which is a type of fish that lives in warm waters of the Pacific and Indian Oceans. The name Nemo was made famous by the popular animated movie "Finding Nemo" produced by Pixar Animation Studios in 2003.
In the movie, Nemo is a young clownfish who gets separated from his father, Marlin, and embarks on an exciting journey through the ocean to find his way back home. Along the way, Nemo meets many colorful characters, including a forgetful blue tang fish named Dory, a group of vegetarian sharks, and a friendly whale.
Clownfish are known for their bright orange color and distinctive white stripes. They have a special relationship with sea anemones, which they use for shelter and protection. Clownfish are immune to the stinging cells of the anemone and help to keep it clean by removing parasites and dead tissue.
"Nemo" has become a beloved character for people of all ages, and the movie has helped to raise awareness about the importance of ocean conservation and the unique animals that live there.
llm = ChatNVIDIA(model="ai-mixtral-8x7b-instruct")
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
chat = ChatNVIDIA(model="ai-mixtral-8x7b-instruct", temperature=0.1, max_tokens=1000, top_p=1.0)
doc_chain = load_qa_chain(chat , chain_type="stuff", prompt=QA_PROMPT)
qa = ConversationalRetrievalChain(
retriever=docsearch.as_retriever(),
combine_docs_chain=doc_chain,
memory=memory,
question_generator=question_generator,
)
query = "Tell me something about nemo"
result = qa({"question": query})
rag_result = result.get("answer")
rag_result
输出如下
'NVIDIA NeMo Framework is an end-to-end, cloud-native framework designed to build, customize, and deploy generative AI models anywhere. It supports large-scale training features, including Mixed Precision Training. The framework is well-documented with getting started guides, tutorials, and example scripts for pretraining and fine-tuning. It also provides NeMo APIs, NeMo Collections, and Speech AI Tools, along with the NeMo Framework Launcher, NeMo Aligner, and NeMo Curator. NVIDIA NeMo Framework is particularly useful for Large Language Models, Embedding Models, Multimodal Models, and Speech AI Models.'