项目场景:
Elasticsearch分组后,根据开始时间和结束事件计算出两字段时间差排序,通过DSL和java API两种方式
解决方案:
示例:在单据表中,查询2022-01-19至2022-01-20每个人提交的单据数量,及每个人单据审批时间,并根据审批时长从高到低排序。
1、DSL方式 :
GET /userbill/_search
{
"from":0,
"size":0,
"query":{
"bool":{
"filter":[
{
"bool":{
"must":[
{
"term":{
"type":{
"value":"bill",
"boost":1
}
}
},
{
"range":{
"createTime":{
"from":"2022-01-09",
"to":"2022-01-10",
"include_lower":true,
"include_upper":true,
"boost":1
}
}
}
],
"adjust_pure_negative":true,
"boost":1
}
}
],
"adjust_pure_negative":true,
"boost":1
}
},
"_source":false,
"aggregations":{
"group_name":{
"terms":{
"field":"userId",
"size":999999,
"min_doc_count":1,
"shard_min_doc_count":0,
"show_term_doc_count_error":false,
"order":[
{
"avg_grade":"desc"
},
{
"_key":"asc"
}
]
},
"aggregations":{
"avg_grade":{
"avg":{
"script":{
"source":"(doc['completeTime'].value.toInstant().toEpochMilli() - doc['createTime'].value.toInstant().toEpochMilli())",
"lang":"painless"
}
}
},
"bucket_sort":{
"bucket_sort":{
"sort":[
],
"from":0,
"size":10,
"gap_policy":"SKIP"
}
}
}
}
}
}
查询结果
key为分组用户ID,doc_count为数量,avg_grade内value为审批平均时长
{
"took" : 64,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 333,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"group_name" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "961292",
"doc_count" : 2,
"avg_grade" : {
"value" : 6.80483E8
}
},
{
"key" : "88896565",
"doc_count" : 4,
"avg_grade" : {
"value" : 4.64815E7
}
},
{
"key" : "918555",
"doc_count" : 15,
"avg_grade" : {
"value" : 3662400.0
}
},
{
"key" : "928024",
"doc_count" : 15,
"avg_grade" : {
"value" : 596000.0
}
},
{
"key" : "88895739",
"doc_count" : 27,
"avg_grade" : {
"value" : 501814.81481481483
}
},
{
"key" : "924208",
"doc_count" : 21,
"avg_grade" : {
"value" : 240809.52380952382
}
},
{
"key" : "",
"doc_count" : 9,
"avg_grade" : {
"value" : 99777.77777777778
}
},
{
"key" : "366474",
"doc_count" : 15,
"avg_grade" : {
"value" : 69933.33333333333
}
},
{
"key" : "951478",
"doc_count" : 2,
"avg_grade" : {
"value" : 55500.0
}
},
{
"key" : "1002453",
"doc_count" : 20,
"avg_grade" : {
"value" : 41150.0
}
}
]
}
}
}
2、java API方式
查询后解析结果,封装到List集合中
public List<StatisticsData> test(){
SearchSourceBuilder builder = new SearchSourceBuilder();
BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery();
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
boolQueryBuilder.must(QueryBuilders.termQuery("type", "bill"));
boolQueryBuilder.must(QueryBuilders.rangeQuery("createTime").gte("2022-01-19").lte("2022-01-20"));
queryBuilder.filter(boolQueryBuilder);
builder.query(queryBuilder);
TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders
.terms("group_name")
.field(aggFiled)
.size(999999)
.order(BucketOrder.aggregation(("avg_grade"), order))
.subAggregation(AggregationBuilders
.avg("avg_grade")
.script(new Script("(doc['completeTime'].value.toInstant().toEpochMilli() - doc['createTime'].value.toInstant().toEpochMilli())")))
.subAggregation(new BucketSortPipelineAggregationBuilder("bucket_sort", null).from(form).size(size));
builder.from(0);
builder.size(0);
builder.fetchSource(false);
SearchRequest request = new SearchRequest(ElasticsearchConstant.EMSUSERBILLPROCESS_INDEX);
request.source(builder);
SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
ParsedTerms parsedTerms = (ParsedTerms) response.getAggregations().asMap().get("group_name");
List<Terms.Bucket> buckets = (List<Terms.Bucket>) parsedTerms.getBuckets();
List<StatisticsData> li = buckets.stream().map(u -> {
StatisticsData s = new StatisticsData();
s.setKey(String.valueOf(u.getKey()));
s.setDoc_count(String.valueOf(u.getDocCount()));
return s;
}).collect(Collectors.toList());
return li;
}