准备测试数据:
为了方便观察,dim维度设置为3。
import uuid
import numpy as np
from pymilvus import (
connections,
FieldSchema, CollectionSchema, DataType,
Collection,
)
collection_name = "hello_milvus"
host = "192.168.0.109"
port = 19530
username = ""
password = ""
num_entities, dim = 3000, 128
def generate_uuids(number_of_uuids):
uuids = [str(uuid.uuid4()) for _ in range(number_of_uuids)]
return uuids
print("start connecting to Milvus")
connections.connect("default", host=host, port=port,user=username,password=password)
fields = [
FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=False),
FieldSchema(name="random", dtype=DataType.DOUBLE),
FieldSchema(name="comment", dtype=DataType.VARCHAR, max_length=200),
FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim)
]
schema = CollectionSchema(fields, "hello_milvus is the simplest demo to introduce the APIs")
print("Create collection `hello_world`")
coll = Collection(collection_name, schema, consistency_level="Bounded",shards_num=1)
print("Start inserting entities")
rng = np.random.default_rng(seed=19530)
entities = [
[i for i in range(num_entities)],
rng.random(num_entities).tolist(),
generate_uuids(num_entities),
rng.random((num_entities, dim)),
]
insert_result = coll.insert(entities)
print("Start flush")
coll.flush()
print("Start creating index")
index_params = {
"index_type": "HNSW",
"metric_type": "COSINE",
"params": {
"M": 16,
"efConstruction": 40
}
}
coll.create_index(
field_name="embeddings",
index_params=index_params,
index_name="idx_em"
)
coll.load()
print("done")
执行向量搜索:
import random
from pymilvus import (
connections,
Collection,
)
dim = 3
if __name__ == '__main__':
connections.connect(
alias="default",
user='',
password='',
host='192.168.0.109',
port='19530'
)
coll = Collection("hello_milvus")
search_param = {
"metric_type": "COSINE",
"params": {"ef": 40}
}
search_data = [random.random() for _ in range(dim)]
results = coll.search(
data=[search_data],
anns_field="embeddings",
param=search_param,
limit=5,
# expr=None,
output_fields=['pk','embeddings'],
# consistency_level="Eventually"
)
print(results)
注意:ef >= limit数量