一 背景:选取常用的向量数据库做些研究,参考
二 今天先来看看 Qdrant
1. 基本介绍(Qdrant - Vector Database)
Qdrant (read: quadrant) is a vector similarity search engine and vector database. It provides a production-ready service with a convenient API to store, search, and manage points—vectors with an additional payload Qdrant is tailored to extended filtering support. It makes it useful for all sorts of neural-network or semantic-based matching, faceted search, and other applications.
Qdrant is written in Rust 🦀, which makes it fast and reliable even under high load. See benchmarks.
With Qdrant, embeddings or neural network encoders can be turned into full-fledged applications for matching, searching, recommending, and much more!
Qdrant is also available as a fully managed Qdrant Cloud ⛅ including a free tier.
Quick Start • Client Libraries • Demo Projects • Integrations • Contact
2. 本地Docker部署 (Quickstart - Qdrant)
docker pull qdrant/qdrant
docker run -p 6333:6333 -p 6334:6334 \
-v $(pwd)/qdrant_storage:/qdrant/storage:z \
qdrant/qdrant
docker run -d -p 6333:6333 -p 6334:6334 -v D:/petspace/qdrant/qdrant_storage:/qdrant/storage:z qdrant/qdrant
REST API: localhost:6333
Web UI: localhost:6333/dashboard
GRPC API: localhost:6334
3. 购买云服(免费版可试用)
云服地址:Vector Search Database | Qdrant Cloud
官方提供的计算机器配置建议:Vector Search Database | Qdrant Cloud
4. demo:
1)本地docker启动 或 连接云服
2)使用python脚本,利用官方python驱动初始化数据库,并写入一批向量,具体见附件
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
from qdrant_client.http.models import PointStruct
client = QdrantClient("localhost", port=6333)
#client = QdrantClient("https://xxxxxxxxx.us-east4-0.gcp.cloud.qdrant.io", port=6333, api_key="U-i_fkeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeyk")
# client.create_collection(
# collection_name="test_face",
# vectors_config=VectorParams(size=6, distance=Distance.DOT),
# )
faces = [
PointStruct(id=275, payload={"kidId": 275, "kidName": "马XXX"}, vector=[0.067767, 0, 0.041501254, 0, 0, 0.022919167]),
........
PointStruct(id=276, payload={"kidId": 276, "kidName": "马XXX"}, vector=[0.06975273, 0, 0.026070384, 0.020760203, 0.0014666885, 0.021621302])
]
# 循环调一下,一把全写timeout
for face in faces:
operation_info = client.upsert(
collection_name="test_face",
wait=True,
points=[face],
)
print(operation_info)
3)试用python驱动
# 一个简单的查询
# 查询相似度最高的前3个
search_result = client.search(
collection_name="test_face", query_vector=[0.06972373, 0, 0.026070384, 0.0207623203, 0.001324885, 0.021621302], limit=3
)
print(search_result)
4)试用.net驱动
a 微软的驱动:Microsoft.SemanticKernel.Connectors.Memory.Qdrant
问题:微软的驱动截止目前实现貌似是不完整的,只能基于全库查找,标称的 tag 查找没有实现完整,按 pyload 查找未支持。
using Microsoft.SemanticKernel.Connectors.Memory.Qdrant;
.....
_clientNet = new QdrantVectorDbClient("http://localhost:6333", 6);
.....
public async Task<bool> TestQdrantAsyncByMS()
{
await TestCreateCollectionAsyncByMS();
var record = BuildTestVectorByMS();
// TODO ?? 驱动并没有把 tags 写入
// 创建向量
await _clientNet.UpsertVectorsAsync(TEST_FACE_COLLECTION_NAME, new List<QdrantVectorRecord>{record});
// 查询最近的向量
var resultList = await SearchNearestAsyncByMS(record.Embedding);
var nearest = resultList.FirstOrDefault(x => x.Item2 > 0.9999);
// TODO 驱动没有提供对payload的过滤
// 查询最近的向量带tag过滤
var resultListTags = await SearchNearestAsyncByMS(record.Embedding, record.Tags);
await _clientNet.DeleteVectorsByIdAsync(TEST_FACE_COLLECTION_NAME, new List<string>() { record.PointId });
return true;
}
private async Task TestCreateCollectionAsyncByMS()
{
// 集合是否存在
var isExist = await _clientNet.DoesCollectionExistAsync(TEST_FACE_COLLECTION_NAME);
if (!isExist)
{
await _clientNet.CreateCollectionAsync(TEST_FACE_COLLECTION_NAME);
}
}
private QdrantVectorRecord BuildTestVectorByMS()
{
var vectorArray = new float[] { 0.002545681f, 0f, 0.08641141f, 0.01007495f, 0.0194224f, 0.0037844249f };
var vectorMemory = new ReadOnlyMemory<float>(vectorArray);
var payload = new Dictionary<string, object>
{
{ "insId", 112},
{ "kidId", 275 },
{ "kidName", "刘XXX" }
};
var record = new QdrantVectorRecord(
Guid.NewGuid().ToString(),
vectorMemory,
payload,
new List<string> { "insId-112", "kidId-275" });
return record;
}
private async Task<List<Tuple<QdrantVectorRecord, double>>> SearchNearestAsyncByMS(ReadOnlyMemory<float> target, IEnumerable<string> requiredTags = null)
{
// 查询向量
var result = _clientNet.FindNearestInCollectionAsync(TEST_FACE_COLLECTION_NAME, target, 0.7, 10, false, requiredTags);
var list = new List<Tuple<QdrantVectorRecord, double>>();
await foreach (var row in result)
{
list.Add(row.ToTuple());
}
return list;
}
b) Qdrant 官方给的dotnet驱动:
需要注意的是:Qdrant同时提供 REST API 和 gRPC API, 工作在不同端口,默认6333,6334
GitHub - qdrant/qdrant-dotnet: Qdrant .Net SDK
官方的这个dotnet驱动内部使用 gRPC API, 调试时注意打开6334端口
注:目前看,官方的驱动基本功能都实现了,特别是基于payload的过滤在实际业务中特别有用
public async Task<bool> TestQdrantByQd()
{
try
{
await TestCreateCollectionAsyncByQd();
var point = BuildPointByQd();
// 创建向量
var upsertRes = await _qdrantClient.UpsertAsync(TEST_FACE_COLLECTION_NAME, new List<PointStruct> { point });
// 查询最近的向量
var floatArray = new float[] { 0.048494156f, 0.073583744f, 0f, 0.13068849f, 0.003689188f, 0.093264f };
var floatArrayMemory = new ReadOnlyMemory<float>(floatArray);
var resultList = await SearchNearestAsyncByQd(floatArrayMemory);
var nearest = resultList.FirstOrDefault(x => x.Score > 0.9999);
// 查询最近的向量带pyload filter
Filter filter = new();
filter.Must.Add(Conditions.Match("insId", 121));
var resultListTags = await SearchNearestAsyncByQd(floatArrayMemory, filter);
// TODO 删除得验证下
if (point.Id.HasNum)
{
var res = await _qdrantClient.DeleteAsync(TEST_FACE_COLLECTION_NAME, point.Id.Num);
}
else
{
var res = await _qdrantClient.DeleteAsync(TEST_FACE_COLLECTION_NAME, new Guid(point.Id.Uuid));
}
return true;
}
catch (Exception err)
{
return false;
}
}
private async Task TestCreateCollectionAsyncByQd()
{
// 集合是否存在
var list = await _qdrantClient.ListCollectionsAsync();
if (list.FirstOrDefault(x => x.Equals(TEST_FACE_COLLECTION_NAME)) == null)
{
await _qdrantClient.CreateCollectionAsync(TEST_FACE_COLLECTION_NAME, new VectorParams() { Size = 6});
}
}
private static PointStruct BuildPointByQd()
{
var point = new PointStruct
{
Id = new PointId(Guid.NewGuid()),
Vectors = new float[] { 0.048494156f, 0.073583744f, 0f, 0.13068849f, 0.003689188f, 0.093264f },
Payload =
{
{ "insId", 121},
{ "kidId", 275 },
{ "kidName", "刘XXX" }
},
};
return point;
}
private async Task<IEnumerable<ScoredPoint>>
SearchNearestAsyncByQd(ReadOnlyMemory<float> floatArrayMemory, Filter filter = null)
{
// 查询向量
var result = await _qdrantClient.SearchAsync(
TEST_FACE_COLLECTION_NAME,
floatArrayMemory,
filter: filter,
scoreThreshold: 0.5f,
limit: 10);
var list = result.ToList();
return list;
}
c) 使用 HttpClient 直接访问 REST API
以下是Cursor给的一组代码供参考:
using System;
using System.Net.Http;
using System.Threading.Tasks;
public class QdrantClient
{
private readonly HttpClient httpClient;
private readonly string baseUrl;
public QdrantClient(string baseUrl)
{
this.baseUrl = baseUrl;
this.httpClient = new HttpClient();
}
public async Task CreateCollection(string collectionName)
{
var url = $"{baseUrl}/collections/{collectionName}";
var response = await httpClient.PutAsync(url, null);
response.EnsureSuccessStatusCode();
}
public async Task InsertObject(string collectionName, string objectId, float[] vector)
{
var url = $"{baseUrl}/collections/{collectionName}/objects/{objectId}";
var content = new StringContent(string.Join(",", vector), System.Text.Encoding.UTF8, "application/json");
var response = await httpClient.PutAsync(url, content);
response.EnsureSuccessStatusCode();
}
public async Task<HttpResponseMessage> Search(string collectionName, float[] queryVector, int topK = 10)
{
var url = $"{baseUrl}/collections/{collectionName}/search";
var content = new StringContent(string.Join(",", queryVector), System.Text.Encoding.UTF8, "application/json");
var response = await httpClient.PostAsync($"{url}?top={topK}", content);
response.EnsureSuccessStatusCode();
return response;
}
}
public class Program
{
public static async Task Main(string[] args)
{
var qdrantBaseUrl = "http://localhost:6333"; // Replace with your Qdrant API base URL
var qdrantClient = new QdrantClient(qdrantBaseUrl);
// Create a collection
await qdrantClient.CreateCollection("my_collection");
// Insert an object with a vector
var objectId = "my_object";
var vector = new float[] { 1.0f, 2.0f, 3.0f };
await qdrantClient.InsertObject("my_collection", objectId, vector);
// Search for similar objects
var queryVector = new float[] { 1.0f, 1.0f, 1.0f };
var response = await qdrantClient.Search("my_collection", queryVector);
var searchResult = await response.Content.ReadAsStringAsync();
Console.WriteLine(searchResult);
}
}