基于llamaindex,用Qdrant做数据库的RAG
Qdrant 官网api 接口:https://api.qdrant.tech/api-reference
配环境:见官网
https://qdrant.tech/documentation/quickstart/
创建requirement.txt
llama-index
llama-index-llms-huggingface
llama-index-embeddings-fastembed
fastembed
Unstructured[md]
llama-index-vector-stores-qdrant
einops
accelerate
sentence-transformers
accelerate==0.29.3
einops==0.7.0
sentence-transformers==2.7.0
transformers==4.39.3
qdrant-client==1.9.0
llama-index==0.10.32
llama-index-agent-openai==0.2.3
llama-index-cli==0.1.12
llama-index-core==0.10.32
llama-index-embeddings-fastembed==0.1.4
llama-index-legacy==0.9.48
llama-index-llms-huggingface==0.1.4
llama-index-vector-stores-qdrant==0.2.8
运行:
pip inatall -r requirement.txt
装fast api
自己搜一下
功能
对集合数据库、上传文件或者数据的增删改查
ps:嵌入模型或者llms可以替换成自己的模型
from qdrant_client import QdrantClient, models
# from fastapi import FastAPI, Body
from fastapi import FastAPI, File, UploadFile , Form
# pip install llama-index llama-index-vector-stores-qdrant
from llama_index.core.indices.vector_store.base import VectorStoreIndex
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client.http.models import Distance, VectorParams, PointStruct
# import openai
# from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex, Document
from langchain_community.llms.baichuan import BaichuanLLM
from langchain_community.embeddings import BaichuanTextEmbeddings
import os
from qdrant_client import QdrantClient
from qdrant_client.http.models import FilterSelector, Filter, FieldCondition, MatchValue
from fastapi import FastAPI, HTTPException
import time
from llama_index.core import (
SimpleDirectoryReader,
VectorStoreIndex,
StorageContext,
ServiceContext,
Settings,
Document
)
app = FastAPI()
API_KEY = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXX'
DIMENSION = 1024
# 更新 Settings
Settings.llm = BaichuanLLM(baichuan_api_key=API_KEY)
Settings.chunk_overlap = 100
Settings.chunk_size = 600
Settings.embed_model = BaichuanTextEmbeddings(baichuan_api_key=API_KEY)
Settings.dimension = DIMENSION
# 初始化 Qdrant 客户端
client = QdrantClient(url="http://localhost:6333")
# 设置 OpenAI API 密钥
# openai.api_key = "YOUR_API_KEY"
def update_knowledge_base(index,input_file):
'''
目的:通过添加新文档来更新知识库。
参数:
index:要更新的索引。
input_file:新文档文件的路径。
过程:
加载新文档并将其插入索引。
SimpleDirectoryReader可以接受哪些文件格式具体请见官网
SimpleDirectoryReader: https://docs.llamaindex.ai/en/stable/module_guides/loading/simpledirectoryreader/
'''
new_docs = SimpleDirectoryReader(input_files=[input_file]).load_data()
index.insert(new_docs[0])
def query_question(index,question):
'''
查询问题
index:索引
question:问题
return 查询结果
'''
query_engine = index.as_query_engine()
res = query_engine.query(question)
print(res)
return res
async def create_doc_from_content_byfileName(content: bytes, metadata: dict = {}) -> Document:
"""Creates a document from file content with additional metadata."""
# 解码内容为字符串
# text = content.decode('utf-8')
text = content
# 创建文档,并添加元数据
doc = Document(text=text, extra_info=metadata)
return doc
async def insert_file_to_knowledge_base_with_filename(collectionName: str, file: UploadFile = File(...)):
try:
# 读取文件内容
content = await file.read()
# 创建文档,并添加 filename 作为元数据的一部分
doc = await create_doc_from_content_byfileName(content, metadata={"file_name": file.filename})
print(doc)
# 构建或更新索引
vector_store = QdrantVectorStore(client=client, collection_name=collectionName)
index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
# index = VectorStoreIndex.from_documents(
# documents=[{"id": len(vector), "vector": vector, "payload": {"file_name": file.filename}}],
# vector_store=vector_store,
# )
# 插入文档,确保 filename 也被存储
index.insert(doc)
return f"Insert '{file.filename}' successfully."
except Exception as e:
return {"error": str(e)}
@app.get("/createCollection/")
async def createCollection(collection_name: str):
"""
Create a new collection with the given name.
"""
# client = QdrantClient(url="http://localhost:6333")
try:
client.create_collection(
collection_name=collection_name,
vectors_config=models.VectorParams(size=DIMENSION, distance=models.Distance.COSINE),
)
return {"message": f"Collection {collection_name} created successfully."}
except Exception as e:
return {"error": f"Failed to create collection: {str(e)}"}
@app.get("/deleteCollection/")
async def deleteCollection(collection_name: str):
"""
Delete the collection with the given name.
"""
# client = QdrantClient(url="http://localhost:6333")
try:
client.delete_collection(collection_name=collection_name)
return {"message": f"Collection {collection_name} deleted successfully."}
except Exception as e:
return {"error": f"Failed to delete collection: {str(e)}"}
@app.get("/listCollections/")
async def listCollections():
"""
List all collections.
"""
# client = QdrantClient(url="http://localhost:6333")
try:
collections = client.get_collections()
return {"collections": [col.name for col in collections.collections]}
except Exception as e:
return {"error": f"Failed to list collections: {str(e)}"}
@app.post("/query_question_from_collection/")
async def query_question_from_collection(collectionName: str, question: str):
'''
查询问题
input:collectionName: str 集合名称, question: str 查询相关问题
'''
# input_dir = input_data.input_dir
vector_store = QdrantVectorStore(client=client, collection_name=collectionName)
# index = get_index_from_collection(vector_store)
index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
return query_question(index, question)
@app.post("/insert_file_to_KnowledgeBase_wo_save/")
async def insert_file_to_KnowledgeBase_wo_save(collectionName: str, file: UploadFile = File(None), text: str = Form(None), textFileName: str = Form(None)):
'''
直接插入文件或者数据,没有保存到本机,输入文本,会保存到对应的.txt
'''
try:
if file is not None:
# 上传文件的情况
return await insert_file_to_knowledge_base_with_filename(collectionName, file)
elif text is not None and textFileName is not None:
# vector_store = MilvusVectorStore(uri=URI, dim=DIMENSION, collection_name=collectionName)
vector_store = QdrantVectorStore(client=client, collection_name=collectionName)
index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
# 输入文本的情况
doc = await create_doc_from_content_byfileName(text, metadata={"file_name": f"{textFileName}.txt"})
index.insert(doc)
return {"message": f"Insert '{file.filename if file is not None else textFileName}' successfully."}
else:
return {"error": "No file or text provided."}
except Exception as e:
return {"error": str(e)}
@app.post("/insert_data_to_KnowledgeBase/")
async def insert_data_to_KnowledgeBase(collectionName: str, file: UploadFile = File(None), text: str = Form(None), textFileName: str = Form(None)):
"""
插入文件或文本,保存到本机 "./{collectionName}/{file.filename}" 的位置。
支持上传文件或直接输入文本,并指定文本文件的名称。
直接输入文本的情况,文本文件名为 textFileName,默认为 "{textFileName}.txt"。
"""
try:
if file is not None:
# 上传文件的情况
save_path = f"./{collectionName}/{file.filename}"
os.makedirs(os.path.dirname(save_path), exist_ok=True) # 创建所需的目录
with open(save_path, mode='wb') as f:
f.write(await file.read())
elif text is not None and textFileName is not None:
# 输入文本的情况
save_path = f"./{collectionName}/{textFileName}.txt"
os.makedirs(os.path.dirname(save_path), exist_ok=True) # 创建所需的目录
with open(save_path, mode='w', encoding='utf-8') as f:
f.write(text)
# else:
# return {"error": "No file or text provided."}
# vector_store = MilvusVectorStore(uri=URI, dim=DIMENSION, collection_name=collectionName)
vector_store = QdrantVectorStore(client=client, collection_name=collectionName)
index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
update_knowledge_base(index, save_path)
# new_docs = SimpleDirectoryReader(input_files=[input_file]).load_data()
# index.insert(new_docs[0])
return {"message": f"Insert '{file.filename if file is not None else textFileName}' successfully."}
except Exception as e:
return {"error": str(e)}
@app.post("/delete_vectors_by_fileName/")
async def delete_vectors_by_fileName(collection_name: str, file_name: str):
'''
删除指定文件名的向量,同时如果存在,也会删除存在本地的文件( 若文件名不存在,则删除同名的 .txt 文件)
input:collection_name: str 集合名称, file_name: str 文件名
'''
try:
# 构建查询表达式
# query_expr = f"file_name == '{file_name}'"
message = {"message": f"Deleted vectors of file_name '{file_name}' from '{collection_name}' ."}
# 删除本地文件
save_path = f"./{collection_name}/{file_name}"
if os.path.exists(save_path):
os.remove(save_path)
# query_expr = f"file_name == '{file_name}'"
message = {"message": f"Deleted local file and vectors of file_name '{file_name}' from '{collection_name}' ."}
else:
# 尝试删除同名的 .txt 文件
txt_save_path = f"./{collection_name}/{file_name}.txt"
if os.path.exists(txt_save_path):
os.remove(txt_save_path)
# query_expr = f"file_name == '{file_name}.txt'"
message = {"message": f"Failed to find '{file_name}' in '{collection_name}', but deleted local file and the vectors of '{file_name}.txt' instead."}
# 连接到Milvus
from qdrant_client import QdrantClient
# 连接到Qdrant
# client = client
# 删除向量
# client.delete_collection(collection_name=collection_name, filter=query_expr)
# 删除向量
client.delete(
collection_name=collection_name,
points_selector=FilterSelector(
filter=Filter(
must=[
FieldCondition(
key="file_name",
match=MatchValue(value=file_name if "." in file_name else f"{file_name}.txt"),
),
],
)
),
)
# 返回成功信息
return message
except Exception as e:
# 返回错误信息
return {"error": str(e)}
# 运行 FastAPI 应用
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)