一 最近项目上将ELK7升级到8, 除了ELK升级本身的一些变化,程序调用 es api 也有较大变化。
1) es7 的 dotnet 的驱动有两个:
Introduction | Elasticsearch .NET Clients [7.17] | Elastic
1. Elasticsearch.Net: 相对底层的客户端驱动,比较灵活。
2. NEST:相对高级的客户端,包装了常用操作的类型,定义了一个强类型的查询SDL, 其内部仍然是在调用Elasticsearch.Net, 并允许用户必要时缩退到底层客户端。
2)es8使用新的客户端:Elastic.Clients.Elasticsearch
Introduction | Elasticsearch .NET Client [8.9] | Elastic
es8的客户端提供了强类型api, 定义了新的查询DSL, 与 NEST 差异比较大。
项目中虽然只是使用的了基本的读写日志等操作,升级过程中对查询api的修改很多,近乎重写。
二 以下,记录一个典型查询的写法, 以及简单的聚合函数(取范围内某个字段的value列表)
using Elastic.Clients.Elasticsearch;
using Elastic.Clients.Elasticsearch.QueryDsl;
using SortOrder = Elastic.Clients.Elasticsearch.SortOrder;
using Elastic.Clients.Elasticsearch.Aggregations;
public async Task<OperationLogListOutput> ReadElkAsync(OperationLogCondition condition)
{
var elkClient = _elkSearchClient.ElkClient();
//构建查询query
var queryDesc = BuildQueryDescriptor(condition);
// sort: 简单排序, 按照创建时间或者列头排序
var sortDesc = BuildSort(condition);
// 查询
var resp = await SearchAsync(condition, elkClient, queryDesc, sortDesc);
var docList = resp.Documents.ToList();
return docList
}
private Action<QueryDescriptor<OperationLogOutput>> BuildQueryDescriptor(OperationLogCondition condition)
{
var mustQueries = new List<Action<QueryDescriptor<OperationLogOutput>>>
{
// 获取MyServiceXXX服务写的日志
m => m.MatchPhrase(p => p.Field("Service").Query("MyServiceXXX")),
// 获取XXX机构的日志
m => m.Term(p => p.Field(i => i.InstitutionId).Value(condition.InstitutionId)),
// 日志必须包含字段“StartContent”或字段“EndContent”
m => m.Bool(
b => b.Should(
s => s.Exists(f => f.Field(o => o.StartContent)),
s => s.Exists(f => f.Field(o => o.EndContent))
)
)
};
// StartDate
if (condition.StartDate.HasValue)
{
// 时区已经是UTC, ELK中的时区也是UTC, 直接使用即可,否则需转时区
mustQueries.Add(m =>
m.Range(r =>
r.DateRange(d =>
d.Field(i => i.Timestamp).Gte(DateMath.Anchored(condition.StartDate.Value)))));
}
// EndDate
if (condition.EndDate.HasValue)
{
mustQueries.Add(m =>
m.Range(r =>
r.DateRange(d =>
d.Field(i => i.Timestamp).Lte(DateMath.Anchored(condition.EndDate.Value)))));
}
// 关键字检索SearchText
if (!string.IsNullOrWhiteSpace(condition.SearchText))
{
// 模糊匹配
var filterValue = $"*{condition.SearchText}*";
var shouldQueries = new List<Action<QueryDescriptor<OperationLogOutput>>>
{
m => m.Wildcard(new WildcardQuery("OperatorEmail.keyword") { Value = filterValue }),
m => m.Wildcard(new WildcardQuery("StartContent.keyword") { Value = filterValue }),
m => m.Wildcard(new WildcardQuery("EndContent.keyword") { Value = filterValue })
};
mustQueries.Add(m => m.Bool(b => b.Should(shouldQueries.ToArray())));
}
Action<QueryDescriptor<OperationLogOutput>> retQuery = q => q.Bool(b => b.Filter(mustQueries.ToArray()));
return retQuery;
}
private SortOptionsDescriptor<OperationLogOutput> BuildSort(OperationLogCondition condition)
{
SortOptionsDescriptor<OperationLogOutput> sortDesc;
var gridOrderField = JsonConvert.DeserializeObject<OperationLogGridOrderField>(condition.Sort, new JsonSerializerSettings()
{
ContractResolver = new DefaultContractResolver
{
NamingStrategy = new CamelCaseNamingStrategy()
}
});
// 如果没有排序, 前端传入的是”{}“
if (!string.IsNullOrWhiteSpace(gridOrderField.Value) && !string.IsNullOrWhiteSpace(gridOrderField.Property))
{
if (gridOrderField.Value.ToLower() == "asc")
{
sortDesc = BuildSortExpression(gridOrderField.Property, GetFieldSort(SortOrder.Asc));
}
else
{
sortDesc = BuildSortExpression(gridOrderField.Property, GetFieldSort(SortOrder.Desc));
}
}
else
{
sortDesc = new SortOptionsDescriptor<OperationLogOutput>();
sortDesc.Field(x => x.Timestamp, GetFieldSort(SortOrder.Desc));
}
return sortDesc;
}
// 典型的查询实例
private async Task<SearchResponse<OperationLogOutput>> SearchAsync(OperationLogCondition condition, ElasticsearchClient elkClient, Action<QueryDescriptor<OperationLogOutput>> queryDesc, SortOptionsDescriptor<OperationLogOutput> sortDesc)
{
var from = condition.IsExport ? 0 : condition.Page * condition.Limit;
var limit = condition.IsExport ? (condition.Page + 1) * condition.Limit : condition.Limit;
var ret = await elkClient.SearchAsync<OperationLogOutput>(i => i.Index(_elkSearchClient.ELK_DEFAULT_INDEX)
.Query(queryDesc)
.Sort(sortDesc)
.From(from)
.Size(limit));
return ret;
}
// 使用聚合函数的实例
// 获取满足条件的日志中的 “BizOperation” 字段的列表,
// 相当于SQL: SELECT BizOperation, count(1) count FROM log GROUP BY BizOperation order by count(1) desc
private async Task<List<string>> SearchElkUniqueBizOperationsAsync(ElasticsearchClient elkClient, Action<QueryDescriptor<OperationLogOutput>> queryDesc)
{
var aggName = "BizOperation";
var ret = await elkClient.SearchAsync<OperationLogOutput>(i => i.Index(_elkSearchClient.ELK_DEFAULT_INDEX)
.Query(queryDesc)
// 不需要返回匹配的event, 只需要聚合数据
.Size(0)
// 默认只返回前10个,这个给一个较大的数500,最大不能超过search.max_buckets(65536)
.Aggregations(agg => agg.Terms(aggName, descriptor => descriptor.Field("Operation.keyword").Size(500))));
if (ret != null && ret.Aggregations != null && ret.Aggregations.Any())
{
if (ret.Aggregations.First().Value is StringTermsAggregate agg && agg.Buckets.Any())
{
var uniqueOperations = agg.Buckets.Where(x => x.Key.Value != null)
.Select(x => x.Key.Value as string)
.ToList();
return uniqueOperations;
}
}
// 返回空列表
return new List<string>();
}
上边的两个查询生成的 ES API 调用如下(可在kibana dev tools里试跑验证):
GET my-index-name/_search
{
"query": {
"bool": {
"filter": [
{
"match_phrase": {
"Service": {
"query": "BusinessLog"
}
}
},
{
"term": {
"InstitutionId": {
"value": 4203
}
}
},
{
"bool": {
"should": [
{
"exists": {
"field": "StartContent"
}
},
{
"exists": {
"field": "EndContent"
}
}
]
}
},
{
"range": {
"@timestamp": {
"gte": "2023-11-20T15:00:00Z"
}
}
},
{
"range": {
"@timestamp": {
"lte": "2023-11-21T14:59:59.999Z"
}
}
}
]
}
},
"size": 0,
"aggs": {
"Operation": {
"terms": {
"field": "Operation.keyword",
"size": 500
}
}
}
}
上边写聚合函数的时参考的资料
Terms aggregation | Elasticsearch Guide [8.11] | Elastic
c# - AggregationContainer vs. AggregationDescriptor - Stack Overflow