Qdrant向量数据库的使用
1 介绍
Qdrant(读作:quadrant)是一个矢量相似性搜索引擎和矢量数据库。它提供了一个方便的API来存储、搜索和管理点向量的生产就绪服务,并提供了额外的有效负载专门用于扩展过滤支持。这使得在各种神经网络或基于语义的匹配、多面搜索和其他应用方面更加有用。
Qdrant要比Weaviate向量库好用。
# Github地址
https://github.com/qdrant/qdrant
# 官网
https://qdrant.tech/
# 开发文档
https://qdrant.tech/documentation/
# 详细的实例参考
https://qdrant.tech/documentation/tutorials/search-beginners/
2 使用Docker安装
安装Qdrant
docker run -itd --name=qdrant \
-p 6333:6333 \
-p 6334:6334 \
-e "QDRANT__SERVICE__API_KEY=123456" \
-e "QDRANT__SERVICE__JWT_RBAC=true" \
-v /home/qdrant_storage:/qdrant/storage:z \
qdrant/qdrant
可视化界面和接口
REST接口: localhost:6333
Web UI: localhost:6333/dashboard
GRPC接口: localhost:6334
REST接口
Web UI
需要输入密钥(创建容器时设置):123456
操作台
安装Python
pip install qdrant-client[fastembed] -i https://pypi.tuna.tsinghua.edu.cn/simple
3 Python使用Qdrant
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams
from qdrant_client.models import PointStruct
from qdrant_client.models import Filter, FieldCondition, MatchValue
# 创建客户端
ip_addr = "192.168.108.200"
client = QdrantClient(
url="http://"+ip_addr,
port=6333,
grpc_port=6334,
api_key="123456"
)
# 创建索引
def create_collection():
client.create_collection(
# 设置索引的名称
collection_name="test_collection",
# 设置索引中输入向量的长度
# 参数size是数据维度
# 参数distance是计算的方法,主要有COSINE(余弦),EUCLID(欧氏距离)、DOT(点积),MANHATTAN(曼哈顿距离)
vectors_config=VectorParams(size=4, distance=Distance.DOT),
)
def add_data():
operation_info = client.upsert(
collection_name="test_collection",
wait=True,
points=[
PointStruct(id=1, vector=[0.05, 0.61, 0.76, 0.74], payload={"city": "Berlin"}),
PointStruct(id=2, vector=[0.19, 0.81, 0.75, 0.11], payload={"city": "London"}),
PointStruct(id=3, vector=[0.36, 0.55, 0.47, 0.94], payload={"city": "Moscow"}),
PointStruct(id=4, vector=[0.18, 0.01, 0.85, 0.80], payload={"city": "New York"}),
PointStruct(id=5, vector=[0.24, 0.18, 0.22, 0.44], payload={"city": "Beijing"}),
PointStruct(id=6, vector=[0.35, 0.08, 0.11, 0.44], payload={"city": "Mumbai"}),
],
)
# 返回值
# operation_id=0 status=<UpdateStatus.COMPLETED: 'completed'>
print(operation_info)
def query_data():
search_result = client.search(
# 设置索引
collection_name="test_collection",
# 查询向量
query_vector=[0.2, 0.1, 0.9, 0.7],
# 限制返回值的数量
limit=3
)
# 返回值
# [ScoredPoint(id=4, version=0, score=1.362, payload={'city': 'New York'}, vector=None, shard_key=None), ScoredPoint(id=1, version=0, score=1.273, payload={'city': 'Berlin'}, vector=None, shard_key=None), ScoredPoint(id=3, version=0, score=1.208, payload={'city': 'Moscow'}, vector=None, shard_key=None)]
print(search_result)
def filter_data():
search_result = client.search(
collection_name="test_collection",
query_vector=[0.2, 0.1, 0.9, 0.7],
# 添加过滤器,数据中必须含有London
query_filter=Filter(
must=[FieldCondition(key="city", match=MatchValue(value="London"))]
),
with_payload=True,
limit=3
)
# 返回值
# [ScoredPoint(id=2, version=0, score=0.871, payload={'city': 'London'}, vector=None, shard_key=None)]
print(search_result)
if __name__ == '__main__':
# 1 创建索引
# create_collection()
# 2 添加数据
# add_data()
# 3 查询数据
query_data()
# 4 过滤数据
filter_data()