**ES对多个字段聚合,select A,B,**COUNT(*)from table group by A,B
假设有下表
NAME SEX PROF
李诚 男 副教授
张旭 男 讲师
王萍 女 助教
刘冰 女 助教
要查询select SEX,PROF,COUNT(*) from table group by SEX,PROF
1、正确的答案:
修改elasticsearch.yml配置文件,添加下面两个配置,重启es集群
script.engine.groovy.inline.aggs: on
script.engine.groovy.inline.search: on
{
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"sexprof": {
"terms": {
"script": {
"inline": "doc['SEX.keyword'].value +'-split-'+ doc['PROF.keyword'].value "
}
}
}
}
}
JavaAPI
Script script = new Script(ScriptType.INLINE, “groovy”, “doc[’ SEX .keyword’].value+‘-split-’+doc[’ PROF .keyword’].value”, new HashMap<String, Object>());
TermsAggregationBuilder callTypeTeamAgg =AggregationBuilders.terms(“sexprof”).script(script);
这样得到的sex,prof两列是一起返回的,中间通过"-split-"分开,拿到结果后自行处理,结果大概像下面的(省略了没用的信息):
{
"aggregations": {
"sexprof": {
"doc_count_error_upper_bound": 5,
"sum_other_doc_count": 379,
"buckets": [
{
"key": "女-split-助教",
"doc_count": 2
},
{
"key": "男-split-讲师",
"doc_count": 1
},
{
"key": "男-split-教授",
"doc_count": 1
}
]
}
}
}
2、错误的答案:
{
"query": {
"match_all": {}
},
"aggs": {
"sex": {
"terms": {
"field": "SEX.keyword"
}
},
"prof": {
"terms": {
"field": "PROF.keyword'"
}
}
}
}
拿到的结果是大概像这样的(省略了没用的信息),分开统计了,这明显不是我们想要的
{
"aggregations": {
"sex": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "女",
"doc_count": 2
},
{
"key": "男",
"doc_count": 2
}
]
},
"prof": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "副教授",
"doc_count": 1
},
{
"key": "讲师",
"doc_count": 1
},
{
"key": "助教",
"doc_count": 1
}
]
}
}
}