llamindex-Qdrant-RAG

爽歪歪和哇哈哈哈

已于 2024-10-07 15:54:54 修改

阅读量383

点赞数 6

文章标签： Qdrant RAG LlamaIndex LLMs

于 2024-08-15 11:00:41 首次发布

本文链接：https://blog.csdn.net/Brilliant_orange/article/details/141216453

版权

基于llamaindex，用Qdrant做数据库的RAG

Qdrant 官网api 接口：https://api.qdrant.tech/api-reference

配环境：见官网

https://qdrant.tech/documentation/quickstart/

创建requirement.txt


llama-index
llama-index-llms-huggingface
llama-index-embeddings-fastembed
fastembed
Unstructured[md]

llama-index-vector-stores-qdrant
einops
accelerate
sentence-transformers
accelerate==0.29.3
einops==0.7.0
sentence-transformers==2.7.0
transformers==4.39.3
qdrant-client==1.9.0
llama-index==0.10.32
llama-index-agent-openai==0.2.3
llama-index-cli==0.1.12
llama-index-core==0.10.32
llama-index-embeddings-fastembed==0.1.4
llama-index-legacy==0.9.48
llama-index-llms-huggingface==0.1.4
llama-index-vector-stores-qdrant==0.2.8

运行：

pip inatall -r requirement.txt

装fast api

自己搜一下

功能

对集合数据库、上传文件或者数据的增删改查
ps：嵌入模型或者llms可以替换成自己的模型


from qdrant_client import QdrantClient, models
# from fastapi import FastAPI, Body

from fastapi import FastAPI, File, UploadFile , Form
# pip install llama-index llama-index-vector-stores-qdrant
from llama_index.core.indices.vector_store.base import VectorStoreIndex
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client.http.models import Distance, VectorParams, PointStruct
# import openai
# from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex, Document
from langchain_community.llms.baichuan import BaichuanLLM
from langchain_community.embeddings import BaichuanTextEmbeddings
import os
from qdrant_client import QdrantClient
from qdrant_client.http.models import FilterSelector, Filter, FieldCondition, MatchValue
from fastapi import FastAPI, HTTPException
import time
from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    StorageContext,
    ServiceContext,
    Settings,
    Document
)
app = FastAPI()

API_KEY = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXX'
DIMENSION = 1024
# 更新 Settings
Settings.llm = BaichuanLLM(baichuan_api_key=API_KEY)
Settings.chunk_overlap = 100
Settings.chunk_size = 600
Settings.embed_model = BaichuanTextEmbeddings(baichuan_api_key=API_KEY)
Settings.dimension = DIMENSION 
# 初始化 Qdrant 客户端
client = QdrantClient(url="http://localhost:6333")

# 设置 OpenAI API 密钥
# openai.api_key = "YOUR_API_KEY"

def update_knowledge_base(index,input_file):
    '''
    目的：通过添加新文档来更新知识库。
    参数：
    index：要更新的索引。
    input_file：新文档文件的路径。
    过程：
    加载新文档并将其插入索引。
    SimpleDirectoryReader可以接受哪些文件格式具体请见官网
    SimpleDirectoryReader: https://docs.llamaindex.ai/en/stable/module_guides/loading/simpledirectoryreader/
    '''

    new_docs = SimpleDirectoryReader(input_files=[input_file]).load_data()
    index.insert(new_docs[0])

def query_question(index,question):
    '''
    查询问题
    index:索引
    question:问题
    return 查询结果
    '''
    query_engine = index.as_query_engine()
    res = query_engine.query(question)
    print(res)
    return res
async def create_doc_from_content_byfileName(content: bytes, metadata: dict = {}) -> Document:
    """Creates a document from file content with additional metadata."""
    # 解码内容为字符串
    # text = content.decode('utf-8')
    text = content
    # 创建文档，并添加元数据
    doc = Document(text=text, extra_info=metadata)
    
    return doc
async def insert_file_to_knowledge_base_with_filename(collectionName: str, file: UploadFile = File(...)):
    try:
        # 读取文件内容
        content = await file.read()
        # 创建文档，并添加 filename 作为元数据的一部分
        doc = await create_doc_from_content_byfileName(content, metadata={"file_name": file.filename})
        print(doc)
        # 构建或更新索引
        vector_store = QdrantVectorStore(client=client, collection_name=collectionName)
        index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
        # index = VectorStoreIndex.from_documents(
        #     documents=[{"id": len(vector), "vector": vector, "payload": {"file_name": file.filename}}],
        #     vector_store=vector_store,
        # )
        # 插入文档，确保 filename 也被存储
        index.insert(doc)

        return f"Insert '{file.filename}' successfully."
    except Exception as e:
        return {"error": str(e)}


@app.get("/createCollection/")
async def createCollection(collection_name: str):
    """
    Create a new collection with the given name.
    """
    # client = QdrantClient(url="http://localhost:6333")
    try:
        client.create_collection(
            collection_name=collection_name,
            vectors_config=models.VectorParams(size=DIMENSION, distance=models.Distance.COSINE),
        )
        return {"message": f"Collection {collection_name} created successfully."}
    except Exception as e:
        return {"error": f"Failed to create collection: {str(e)}"}


@app.get("/deleteCollection/")
async def deleteCollection(collection_name: str):
    """
    Delete the collection with the given name.
    """
    # client = QdrantClient(url="http://localhost:6333")
    try:
        client.delete_collection(collection_name=collection_name)
        return {"message": f"Collection {collection_name} deleted successfully."}
    except Exception as e:
        return {"error": f"Failed to delete collection: {str(e)}"}


@app.get("/listCollections/")
async def listCollections():
    """
    List all collections.
    """
    # client = QdrantClient(url="http://localhost:6333")
    try:
        collections = client.get_collections()
        return {"collections": [col.name for col in collections.collections]}
    except Exception as e:
        return {"error": f"Failed to list collections: {str(e)}"}


@app.post("/query_question_from_collection/")
async def query_question_from_collection(collectionName: str, question: str):
    '''
    查询问题
    input：collectionName: str 集合名称, question: str 查询相关问题
    '''
    # input_dir = input_data.input_dir
    vector_store = QdrantVectorStore(client=client, collection_name=collectionName)

    # index = get_index_from_collection(vector_store) 
    index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
    return query_question(index, question)



@app.post("/insert_file_to_KnowledgeBase_wo_save/")
async def insert_file_to_KnowledgeBase_wo_save(collectionName: str, file: UploadFile = File(None), text: str = Form(None), textFileName: str = Form(None)):
    '''
    直接插入文件或者数据，没有保存到本机,输入文本，会保存到对应的.txt
    '''
    try:
        
        if file is not None:
            # 上传文件的情况
            return await insert_file_to_knowledge_base_with_filename(collectionName, file)
        elif text is not None and textFileName is not None:
            # vector_store = MilvusVectorStore(uri=URI, dim=DIMENSION, collection_name=collectionName)
            vector_store = QdrantVectorStore(client=client, collection_name=collectionName)

            index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
            # 输入文本的情况
            doc = await create_doc_from_content_byfileName(text, metadata={"file_name": f"{textFileName}.txt"})
            index.insert(doc)
            return {"message": f"Insert '{file.filename if file is not None else textFileName}' successfully."}
        else:
            return {"error": "No file or text provided."}
    except Exception as e:
        return {"error": str(e)}
    

@app.post("/insert_data_to_KnowledgeBase/")
async def insert_data_to_KnowledgeBase(collectionName: str, file: UploadFile = File(None), text: str = Form(None), textFileName: str = Form(None)):
    """
    插入文件或文本，保存到本机 "./{collectionName}/{file.filename}" 的位置。
    支持上传文件或直接输入文本，并指定文本文件的名称。
    直接输入文本的情况，文本文件名为 textFileName，默认为 "{textFileName}.txt"。
    """
    try:
        if file is not None:
            # 上传文件的情况
            save_path = f"./{collectionName}/{file.filename}"
            os.makedirs(os.path.dirname(save_path), exist_ok=True)  # 创建所需的目录
            with open(save_path, mode='wb') as f:
                f.write(await file.read())
        elif text is not None and textFileName is not None:
            # 输入文本的情况
            save_path = f"./{collectionName}/{textFileName}.txt"
            os.makedirs(os.path.dirname(save_path), exist_ok=True)  # 创建所需的目录
            with open(save_path, mode='w', encoding='utf-8') as f:
                f.write(text)
        # else:
        #     return {"error": "No file or text provided."}

        # vector_store = MilvusVectorStore(uri=URI, dim=DIMENSION, collection_name=collectionName)
        vector_store = QdrantVectorStore(client=client, collection_name=collectionName)

        index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
        update_knowledge_base(index, save_path)
    # new_docs = SimpleDirectoryReader(input_files=[input_file]).load_data()
    # index.insert(new_docs[0])
        return {"message": f"Insert '{file.filename if file is not None else textFileName}' successfully."}
    except Exception as e:
        return {"error": str(e)}


@app.post("/delete_vectors_by_fileName/")
async def delete_vectors_by_fileName(collection_name: str, file_name: str):
    '''
    删除指定文件名的向量,同时如果存在，也会删除存在本地的文件（ 若文件名不存在，则删除同名的 .txt 文件）
    input:collection_name: str 集合名称, file_name: str 文件名
    '''
    try:
        # 构建查询表达式
        # query_expr = f"file_name == '{file_name}'"
        message =  {"message": f"Deleted vectors of file_name '{file_name}' from '{collection_name}' ."}
    
        # 删除本地文件
        save_path = f"./{collection_name}/{file_name}"
        if os.path.exists(save_path):
            os.remove(save_path)
            # query_expr = f"file_name == '{file_name}'"
            message = {"message": f"Deleted local file and  vectors of file_name '{file_name}' from '{collection_name}' ."}
    
        else:
            # 尝试删除同名的 .txt 文件
            txt_save_path = f"./{collection_name}/{file_name}.txt"
            if os.path.exists(txt_save_path):
                os.remove(txt_save_path)
                # query_expr = f"file_name == '{file_name}.txt'"
                message =  {"message": f"Failed to find '{file_name}' in '{collection_name}', but deleted local file and the vectors of '{file_name}.txt'  instead."}
                # 连接到Milvus
        from qdrant_client import QdrantClient

        # 连接到Qdrant
        # client = client

        # 删除向量
        # client.delete_collection(collection_name=collection_name, filter=query_expr)
         # 删除向量
        client.delete(
            collection_name=collection_name,
            points_selector=FilterSelector(
                filter=Filter(
                    must=[
                        FieldCondition(
                            key="file_name",
                            match=MatchValue(value=file_name if "." in file_name else f"{file_name}.txt"),
                        ),
                    ],
                )
            ),
        )
        # 返回成功信息
        return message
    except Exception as e:
        # 返回错误信息
        return {"error": str(e)}





# 运行 FastAPI 应用
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)