.Net使用Elastic.Clients.Elasticsearch在Elasticsearch8中实现向量存储和相似度检索

一、测试环境

Elastic.Clients.Elasticsearch版本:8.13.0
Elasticsearch版本:8.13.0

二、代码

1、创建包含DenseVector的索引
public static bool InitIndex()
{
    // 定义索引配置
    var faceVectorproperties = new Properties
        {
            { "Id" ,new KeywordProperty()},
            { "FileID" ,new KeywordProperty()},
            { "FileGUID" ,new KeywordProperty()},
            { "ResourceID" ,new KeywordProperty()},
            { "FileName" ,new TextProperty()},
            { "Embedding" ,new DenseVectorProperty{Dims = 3 } }
        };
    // 定义索引配置
    var indexConfig = new IndexState
    {
        Settings = new IndexSettings
        {
            NumberOfShards = 1, // 设置分片数
            NumberOfReplicas = 1 // 设置副本数
        },
        Mappings = new TypeMapping
        {
            Properties = faceVectorproperties
        }
    };
    //判断是否已经存在该索引
    var existFaceVectorIndexResponse = _client.Indices.ExistsAsync("FaceVector").Result;
    if (!existFaceVectorIndexResponse.IsValidResponse)
    {
        // 创建索引请求
        var createIndexRequest = new CreateIndexRequest("FaceVector")
        {
            Settings = indexConfig.Settings,
            Mappings = indexConfig.Mappings
        };
        var createFaceVectorIndexResponse = _client.Indices.CreateAsync(createIndexRequest).Result;
        if (createFaceVectorIndexResponse.Acknowledged)
        {
                //添加一条测试数据
                ES_FaceVector temp = new ES_FaceVector
                {
                    FileID = 0,
                    FileGUID = Guid.NewGuid(),
                    ResourceID = 0,
                    FileName = "测试",
                    Embedding = new float[] {1.2f,1.1f,1.3f }
                };
                var addDocResult = AddDoc<ES_FaceVector>(temp, ElasticIndexEnum.FaceVector);
        }
        else
        {
            return false;
        }
    }
    return true;
}

2、索引文档
//批量索引文档
public static bool AddDocs<T>(List<T> data, string indexName) where T : class
{
    var bulkIndexResponse = _client.BulkAsync(b => b
        .Index(indexName)
        .IndexMany(data)
    ).Result;
    return bulkIndexResponse.IsValidResponse;
}
//单个索引文档
public static bool AddDoc<T>(T data, string indexName) where T : class
{
    var response = _client.IndexAsync(data, indexName).Result;
    return response.IsValidResponse;
}

3、对向量字段进行近似knn检索
public static void SearchKnn()
{
    // 构建KNN查询
    var doubleArr = new[] { -0.04604065, 0.054946236, 0.057453074};
    var arrLen = doubleArr.Length;
    var knnQuery = new KnnQuery()
    {
        k = 2,
        NumCandidates = 1000,
        Field = "embedding",
        QueryVector= doubleArr.Select(s=>(float)s).ToArray()
    };
    // 构建Elasticsearch查询
    var searchRequest = new SearchRequest<ES_FaceVector>(ElasticIndexEnum.FaceVector)
    {
        Knn = new KnnQuery[] { knnQuery },
        MinScore = 0.90,
        SourceIncludes = new [] { "fileName", "embedding" }
    };

    var searchResponse = _client.Search<ES_FaceVector>(searchRequest);
    if (searchResponse.IsValidResponse)
    {
        foreach (var hit in searchResponse.Hits)
        {
            // 处理每个文档的结果
            var fileNameTemp = hit.Source.FileName;
            var embeddingTemp = hit.Source.Embedding;
            
        }
    }
    else
    {
        Console.WriteLine($"Error: {searchResponse.DebugInformation}");
    }
}

三、参考

.Net使用Elastic.Clients.Elasticsearch连接Elasticsearch8

https://www.elastic.co/guide/en/elasticsearch/client/net-api/8.13/connecting.html

https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html

https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html


  • 5
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
co.elastic.clients.elasticsearch.core.aggregations 是 Java 客户端 ElasticSearch 的一个聚合(Aggregation)方法,用于对数据进行分析和统计。 具体使用方法可以参考以下示例: ```java import co.elastic.clients.base.*; import co.elastic.clients.elasticsearch.*; import co.elastic.clients.elasticsearch.core.*; import co.elastic.clients.elasticsearch.core.aggregations.*; import co.elastic.clients.elasticsearch.core.aggregations.bucket.*; import co.elastic.clients.elasticsearch.core.aggregations.metrics.*; import java.io.IOException; import java.util.*; public class ElasticSearchAggregationExample { public static void main(String[] args) throws IOException, ApiException { RestClientBuilder restClientBuilder = RestClient.builder( new HttpHost("localhost", 9200, "http") ); ElasticSearch client = new ElasticSearch(restClientBuilder); SearchRequest request = new SearchRequest() .index("my_index") .source(new SearchSource() .query(new MatchAllQuery()) .aggregations(new TermsAggregation("my_terms_agg") .field("my_field") .size(10) .subAggregations(new AvgAggregation("my_avg_agg") .field("my_other_field") ) ) ); SearchResponse response = client.search(request); TermsAggregationResult myTermsAggResult = response.aggregations().terms("my_terms_agg"); for (TermsAggregationEntry entry : myTermsAggResult.buckets()) { String term = entry.keyAsString(); long count = entry.docCount(); AvgAggregationResult myAvgAggResult = entry.aggregations().avg("my_avg_agg"); double avg = myAvgAggResult.value(); System.out.println(term + ": " + count + ", avg: " + avg); } client.close(); } } ``` 这个例子展示了如何使用 co.elastic.clients.elasticsearch.core.aggregations 方法来进行聚合查询。在这个例子,我们使用了 TermsAggregation 和 AvgAggregation 两个聚合方法,对数据进行了分组和统计。具体步骤为: 1. 创建一个 SearchRequest 对象,并设置索引名称和查询条件。 2. 在查询条件添加聚合条件。这里使用了 TermsAggregation 来对数据进行分组,然后使用 AvgAggregation 来统计每个分组的平均值。 3. 执行查询,并获取查询结果。 4. 使用聚合结果对象的方法来获取聚合结果,然后对结果进行处理。 需要注意的是,聚合方法的具体参数和用法可以参考 ElasticSearch 官方文档。同时,Java 客户端的版本和 ElasticSearch 的版本也需要匹配,否则可能会出现兼容性问题。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值