题意:当使用Hugging Face的本地嵌入模型时,为什么llama-index仍然需要一个OpenAI的密钥(key)?
问题背景:
I am creating a very simple question and answer app based on documents using llama-index. Previously, I had it working with OpenAI. Now I want to try using no external APIs so I'm trying the Hugging Face example.
我正在使用 llama-index 创建一个基于文档的非常简单的问答应用。之前,我使用 OpenAI 来实现这个功能。现在我想尝试不使用任何外部 API,所以我正在尝试 Hugging Face 的示例。
Note that for a completely private experience, also setup a local embedding model.
请注意,为了获得完全私有的体验,也请设置一个本地嵌入模型。
Here is my code: 这是我的代码
from pathlib import Path
import gradio as gr
import sys
import logging
import os
from llama_index.llms import HuggingFaceLLM
from llama_index.prompts.prompts import SimpleInputPrompt
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
from llama_index import SimpleDirectoryReader, VectorStoreIndex, ServiceContext, load_index_from_storage, StorageContext
storage_path = "storage/"
docs_path="docs"
def construct_index(directory_path):
max_input_size = 4096
num_outputs = 512
#max_chunk_overlap = 20
chunk_overlap_ratio = 0.1
chunk_size_limit = 600
#prompt_helper = PromptHelper(max_input_size, num_outputs, chunk_overlap_ratio, chunk_size_limit=chunk_size_limit)
system_prompt = """<|SYSTEM|># StableLM Tuned (Alpha version)
- StableLM is a helpful and harmless open-source AI language model developed by StabilityAI.
- StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
- StableLM is more than just an information source, StableLM is also able to write poetry, short stories, and make jokes.
- StableLM will refuse to participate in anything that could harm a human.
"""
# This will wrap the default prompts that are internal to llama-index
query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")
llm = HuggingFaceLLM(
context_window=4096,
max_new_tokens=256,
generate_kwargs={"temperature": 0.7, "do_sample": False},
system_prompt=system_prompt,
query_wrapper_prompt=query_wrapper_prompt,
tokenizer_name="StabilityAI/stablelm-tuned-alpha-3b",
model_name="StabilityAI/stablelm-tuned-alpha-3b",
device_map="auto",
stopping_ids=[50278, 50279, 50277, 1, 0],
tokenizer_kwargs={"max_length": 4096},
# uncomment this if using CUDA to reduce memory usage
# model_kwargs={"torch_dtype": torch.float16}
)
#llm=ChatOpenAI(temperature=0.7, model_name="gpt-3.5-turbo", max_tokens=num_outputs)
#llm_predictor = LLMPredictor(llm=llm)
service_context = ServiceContext.from_defaults(chunk_size=1024, llm=llm)
documents = SimpleDirectoryReader(directory_path).load_data()
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
#index = VectorStoreIndex(documents, llm_predictor=llm_predictor, prompt_helper=prompt_helper)
index.storage_context.persist(persist_dir=storage_path)
return index
def chatbot(input_text):
index = load_index_from_storage(StorageContext.from_defaults(persist_dir=storage_path))
#index = GPTVectorStoreIndex.load_from_disk('index.json')
#query_engine = index.as_query_engine(response_synthesizer=response_synthesizer);
query_engine = index.as_query_engine(streaming=True)
response = query_engine.query(input_text)
print(response.source_nodes)
relevant_files=[]
for node_with_score in response.source_nodes:
print(node_with_score)
print(node_with_score.node)
print(node_with_score.node.metadata)
print(node_with_score.node.metadata['file_name'])
file = node_with_score.node.metadata['file_name']
print( file )
# Resolve the full file path for the downloading
full_file_path = Path( docs_path, file ).resolve()
# See if it's already in the array
if full_file_path not in relevant_files:
relevant_files.append( full_file_path ) # Add it
print( relevant_files )
return response.get_response(), relevant_files
iface = gr.Interface(fn=chatbot,
inputs=gr.components.Textbox(lines=7, label="Enter your text"),
outputs=[
gr.components.Textbox(label="Response"),
gr.components.File(label="Relevant Files")
],
title="Custom-trained AI Chatbot",
allow_flagging="never")
index = construct_index(docs_path)
iface.launch(share=False)
Regardless, the code errors out saying: 无论如何,代码出现了错误提示:
ValueError: No API key found for OpenAI.
Please set either the OPENAI_API_KEY environment variable or openai.api_key prior to initialization.
API keys can be found or created at https://platform.openai.com/account/api-keys
Am I not understanding how to set up a local model?
我是不是没有理解如何设置本地模型?
问题解决:
Turns out I had to set the embed_model to "local" on the ServiceContext.
原来我需要将ServiceContext
中的embed_model
设置为"local"。
ServiceContext.from_defaults(chunk_size=1024, llm=llm, embed_model="local")
Also, when I was loading the vector index from disk I wasn't setting the llm predictor again which cause a secondary issue. So I decided to make the vector index a global variable. Here is my final code that works.
另外,当我从磁盘加载向量索引时,我没有重新设置llm predictor
,这导致了另一个问题。所以我决定将向量索引设置为一个全局变量。以下是我最终能够工作的代码。
from pathlib import Path
import gradio as gr
import sys
import logging
import os
from llama_index.llms import HuggingFaceLLM
from llama_index.prompts.prompts import SimpleInputPrompt
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
from llama_index import SimpleDirectoryReader, VectorStoreIndex, ServiceContext, load_index_from_storage, StorageContext
storage_path = "storage"
docs_path="docs"
print(storage_path)
max_input_size = 4096
num_outputs = 512
#max_chunk_overlap = 20
chunk_overlap_ratio = 0.1
chunk_size_limit = 600
system_prompt = """<|SYSTEM|># StableLM Tuned (Alpha version)
- StableLM is a helpful and harmless open-source AI language model developed by StabilityAI.
- StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
- StableLM is more than just an information source, StableLM is also able to write poetry, short stories, and make jokes.
- StableLM will refuse to participate in anything that could harm a human.
"""
# This will wrap the default prompts that are internal to llama-index
query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")
llm = HuggingFaceLLM(
context_window=4096,
max_new_tokens=256,
generate_kwargs={"temperature": 0.7, "do_sample": False},
system_prompt=system_prompt,
query_wrapper_prompt=query_wrapper_prompt,
tokenizer_name="StabilityAI/stablelm-tuned-alpha-3b",
model_name="StabilityAI/stablelm-tuned-alpha-3b",
device_map="auto",
stopping_ids=[50278, 50279, 50277, 1, 0],
tokenizer_kwargs={"max_length": 4096},
# uncomment this if using CUDA to reduce memory usage
# model_kwargs={"torch_dtype": torch.float16}
)
service_context = ServiceContext.from_defaults(chunk_size=1024, llm=llm, embed_model="local")
documents = SimpleDirectoryReader(docs_path).load_data()
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
def chatbot(input_text):
query_engine = index.as_query_engine()
response = query_engine.query(input_text)
print(response.source_nodes)
relevant_files=[]
for node_with_score in response.source_nodes:
print(node_with_score)
print(node_with_score.node)
print(node_with_score.node.metadata)
print(node_with_score.node.metadata['file_name'])
file = node_with_score.node.metadata['file_name']
print( file )
# Resolve the full file path for the downloading
full_file_path = Path( docs_path, file ).resolve()
# See if it's already in the array
if full_file_path not in relevant_files:
relevant_files.append( full_file_path ) # Add it
print( relevant_files )
return response.response, relevant_files
iface = gr.Interface(fn=chatbot,
inputs=gr.components.Textbox(lines=7, label="Enter your text"),
outputs=[
gr.components.Textbox(label="Response"),
gr.components.File(label="Relevant Files")
],
title="Custom-trained AI Chatbot",
allow_flagging="never")
iface.launch(share=False)