注:下面两项工作在RAG实战1基础上展开
一、RAG实战2:如何使用LlamaIndex存储和读取embedding向量
1,存储
import logging
import sys
import torch
from llama_index.core import PromptTemplate, Settings, SimpleDirectoryReader, VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.huggingface import HuggingFaceLLM
# 定义日志
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
# 定义system prompt
SYSTEM_PROMPT = """You are a helpful AI assistant."""
query_wrapper_prompt = PromptTemplate(
"[INST]<<SYS>>\n" + SYSTEM_PROMPT + "<</SYS>>\n\n{query_str}[/INST] "
)
# 使用llama-index创建本地大模型
llm = HuggingFaceLLM(
context_window=4096,
max_new_tokens=2048,
generate_kwargs={"temperature": 0.0, "do_sample": False},
query_wrapper_prompt=query_wrapper_prompt,
tokenizer_name='/mnt/workspace/Qwen1.5-4B-Chat',
model_name='/mnt/workspace/Qwen1.5-4B-Chat',
device_map="auto",
model_kwargs={"torch_dtype": torch.float16},
)
Settings.llm = llm
# 使用llama-index-embeddings-huggingface构建本地embedding模型
Settings.embed_model = HuggingFaceEmbedding(
model_name="/mnt/workspace/bge-base-zh-v1.5"
)
# 读取文档
documents = SimpleDirectoryReader("document").load_data()
# 对文档进行切分,将切分后的片段转化为embedding向量,构建向量索引
index = VectorStoreIndex.from_documents(documents, transformations=[SentenceSplitter(chunk_size=256)])
# 将embedding向量和向量索引存储到文件中
index.storage_context.persist(persist_dir='doc_emb')
# 构建查询引擎
query_engine = index.as_query_engine(similarity_top_k=5)
# 查询获得答案
response = query_engine.query("不耐疲劳,口燥、咽干可能是哪些证候?")
print(response)
关键代码为index.storage_context.persist(persist_dir='doc_emb')
,其中persist_dir
是存储路径。
2,读取(使用LlamaIndex读取结构化文件中的embedding向量和向量索引数据)
import logging
import sys
import torch
from llama_index.core import PromptTemplate, Settings, SimpleDirectoryReader, VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.huggingface import HuggingFaceLLM
# 定义日志
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
# 定义system prompt
SYSTEM_PROMPT = """You are a helpful AI assistant."""
query_wrapper_prompt = PromptTemplate(
"[INST]<<SYS>>\n" + SYSTEM_PROMPT + "<</SYS>>\n\n{query_str}[/INST] "
)
# 使用llama-index创建本地大模型
llm = HuggingFaceLLM(
context_window=4096,
max_new_tokens=2048,
generate_kwargs={"temperature": 0.0, "do_sample": False},
query_wrapper_prompt=query_wrapper_prompt,
tokenizer_name='/mnt/workspace/Qwen1.5-4B-Chat',
model_name='/mnt/workspace/Qwen1.5-4B-Chat',
device_map="auto",
model_kwargs={"torch_dtype": torch.float16},
)
Settings.llm = llm
# 使用llama-index-embeddings-huggingface构建本地embedding模型
Settings.embed_model = HuggingFaceEmbedding(
model_name="/mnt/workspace/bge-base-zh-v1.5"
)
# 从存储文件中读取embedding向量和向量索引
storage_context = StorageContext.from_defaults(persist_dir="doc_emb")
index = load_index_from_storage(storage_context)
# 构建查询引擎
query_engine = index.as_query_engine(similarity_top_k=5)
# 查询获得答案
response = query_engine.query("不耐疲劳,口燥、咽干可能是哪些证候?")
print(response)
关键代码为
storage_context = StorageContext.from_defaults(persist_dir="doc_emb")
index = load_index_from_storage(storage_context)
StorageContext.from_defaults(persist_dir="doc_emb")
表示从doc_emb目录中读取embedding向量和向量索引,load_index_from_storage(storage_context)
表示根据存储的embedding向量和向量索引重新构建检索索引。
二、RAG实战3:如何追踪哪些文档片段被用于检索增强生成
import logging
import sys
import torch
from llama_index.core import PromptTemplate, Settings, StorageContext, load_index_from_storage, QueryBundle
from llama_index.core.schema import MetadataMode
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.huggingface import HuggingFaceLLM
# 定义日志
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
# 定义system prompt
SYSTEM_PROMPT = """You are a helpful AI assistant."""
query_wrapper_prompt = PromptTemplate(
"[INST]<<SYS>>\n" + SYSTEM_PROMPT + "<</SYS>>\n\n{query_str}[/INST] "
)
# 使用llama-index创建本地大模型
llm = HuggingFaceLLM(
context_window=4096,
max_new_tokens=2048,
generate_kwargs={"temperature": 0.0, "do_sample": False},
query_wrapper_prompt=query_wrapper_prompt,
tokenizer_name='/mnt/workspace/Qwen1.5-4B-Chat',
model_name='/mnt/workspace/Qwen1.5-4B-Chat',
device_map="auto",
model_kwargs={"torch_dtype": torch.float16},
)
Settings.llm = llm
# 使用llama-index-embeddings-huggingface构建本地embedding模型
Settings.embed_model = HuggingFaceEmbedding(
model_name="/mnt/workspace/bge-base-zh-v1.5"
)
# 从存储文件中读取embedding向量和向量索引
storage_context = StorageContext.from_defaults(persist_dir="doc_emb")
index = load_index_from_storage(storage_context)
# 构建查询引擎
query_engine = index.as_query_engine(similarity_top_k=5)
# 获取我们抽取出的相似度前五的片段
contexts = query_engine.retrieve(QueryBundle("不耐疲劳,口燥、咽干可能是哪些证候?"))
print('-'*10 + 'ref' + '-'*10)
for i, context in enumerate(contexts):
print('*'*10 + f'chunk {i} start' + '*'*10)
content = context.node.get_content(metadata_mode=MetadataMode.LLM)
print(content)
print('*' * 10 + f'chunk {i} end' + '*' * 10)
print('-'*10 + 'ref' + '-'*10)
# 查询获得答案
response = query_engine.query("不耐疲劳,口燥、咽干可能是哪些证候?")
print(response)