1.通过get api查询数据
get api的定义如下
def get(ids: Optional[OneOrMany[ID]] = None,
where: Optional[Where] = None,
limit: Optional[int] = None,
offset: Optional[int] = None,
where_document: Optional[WhereDocument] = None,
include: Include = ["metadatas", "documents"]) -> GetResult
Get embeddings and their associate data from the data store. If no ids or where filter is provided returns all embeddings up to limit starting at offset.
Arguments:
ids - The ids of the embeddings to get. Optional.
where - A Where type dict used to filter results by. E.g. {$and: [{"color" : "red"}, {"price": 4.20}]}. Optional.
limit - The number of documents to return. Optional.
offset - The offset to start returning results from. Useful for paging results with limit. Optional.
where_document - A WhereDocument type dict used to filter by the documents. E.g. {"$contains" : "hello"}. Optional.
include - A list of what to include in the results. Can contain "embeddings", "metadatas", "documents". Ids are always included. Defaults to ["metadatas", "documents"]. Optional.
Returns:
GetResult - A GetResult object containing the results.
我们通过设置不同的参数使用get api获取数据记录;
collection.upsert(
documents=[
"this is a document about pineapple",
"this is a document about oranges"
],
metadatas=[{"title":"pineapple v2"}, {"title":"oranges v2"}],
ids=["d1", "d2"]
)
d1 = collection.get(ids=["d1"])
pprint.pprint(d1)
d2 = collection.get(where={"title":"oranges v2"})
pprint.pprint(d2)
d1 = collection.get(where_document={"$contains":"pineapple"})
pprint.pprint(d1)
# {'data': None,
# 'documents': ['this is a document about pineapple'],
# 'embeddings': None,
# 'ids': ['d1'],
# 'included': ['metadatas', 'documents'],
# 'metadatas': [{'title': 'pineapple v2'}],
# 'uris': None}
# {'data': None,
# 'documents': ['this is a document about oranges'],
# 'embeddings': None,
# 'ids': ['d2'],
# 'included': ['metadatas', 'documents'],
# 'metadatas': [{'title': 'oranges v2'}],
# 'uris': None}
# {'data': None,
# 'documents': ['this is a document about pineapple'],
# 'embeddings': None,
# 'ids': ['d1'],
# 'included': ['metadatas', 'documents'],
# 'metadatas': [{'title': 'pineapple v2'}],
# 'uris': None}
2.通过query api查询数据
query的api定义如下
def query(
query_embeddings: OneOrMany[Embedding] | OneOrMany[PyEmbedding] | None = None,
query_texts: OneOrMany[Document] | None = None,
query_images: OneOrMany[Image] | None = None,
query_uris: OneOrMany[URI] | None = None,
ids: OneOrMany[ID] | None = None,
n_results: int = 10,
where: Where | None = None,
where_document: WhereDocument | None = None,
include: Include = ["metadatas", "documents", "distances"]
) -> QueryResult
Get the n_results nearest neighbor embeddings for provided query_embeddings or query_texts.
Args
query_embeddings
The embeddings to get the closes neighbors of. Optional.
query_texts
The document texts to get the closes neighbors of. Optional.
query_images
The images to get the closes neighbors of. Optional.
query_uris
The URIs to be used with data loader. Optional.
ids
A subset of ids to search within. Optional.
n_results
The number of neighbors to return for each query_embedding or query_texts. Optional.
where
A Where type dict used to filter results by. E.g. {"$and": [{"color" : "red"}, {"price": {"$gte": 4.20}}]}. Optional.
where_document
A WhereDocument type dict used to filter by the documents. E.g. {"$contains": "hello"}. Optional.
include
A list of what to include in the results. Can contain "embeddings", "metadatas", "documents", "distances". Ids are always included. Defaults to ["metadatas", "documents", "distances"]. Optional.
Returns
QueryResult
A QueryResult object containing the results.
我们可以通过query api,设置不同的条件来查询数据;
collection.upsert(
documents=[
"this is a document about pineapple",
"this is a document about oranges"
],
metadatas=[{"title":"pineapple v2"}, {"title":"oranges v2"}],
ids=["d1", "d2"]
)
results = collection.query(
query_texts=["pineapple"]
)
pprint.pprint(results)
d1 = collection.query(
query_texts=["pineapple"],
where={"title":"pineapple v2"}
)
pprint.pprint(d1)
d2 = collection.query(
query_texts=["pineapple"],
where_document={"$contains":"oranges"}
)
pprint.pprint(d2)
default_ef = embedding_functions.DefaultEmbeddingFunction()
embedding = default_ef(["pineapple"])
results = collection.query(
query_embeddings=embedding
)
pprint.pprint(results)
# {'data': None,
# 'distances': [[0.586125373840332, 1.494820237159729]],
# 'documents': [['this is a document about pineapple',
# 'this is a document about oranges']],
# 'embeddings': None,
# 'ids': [['d1', 'd2']],
# 'included': ['metadatas', 'documents', 'distances'],
# 'metadatas': [[{'title': 'pineapple v2'}, {'title': 'oranges v2'}]],
# 'uris': None}
# {'data': None,
# 'distances': [[0.586125373840332]],
# 'documents': [['this is a document about pineapple']],
# 'embeddings': None,
# 'ids': [['d1']],
# 'included': ['metadatas', 'documents', 'distances'],
# 'metadatas': [[{'title': 'pineapple v2'}]],
# 'uris': None}
# {'data': None,
# 'distances': [[1.494820237159729]],
# 'documents': [['this is a document about oranges']],
# 'embeddings': None,
# 'ids': [['d2']],
# 'included': ['metadatas', 'documents', 'distances'],
# 'metadatas': [[{'title': 'oranges v2'}]],
# 'uris': None}
# {'data': None,
# 'distances': [[0.586125373840332, 1.494820237159729]],
# 'documents': [['this is a document about pineapple',
# 'this is a document about oranges']],
# 'embeddings': None,
# 'ids': [['d1', 'd2']],
# 'included': ['metadatas', 'documents', 'distances'],
# 'metadatas': [[{'title': 'pineapple v2'}, {'title': 'oranges v2'}]],
# 'uris': None}