1 - distinct
SELECT DISTINCT(user_id) FROM table WHERE user_id_type = 3;
{
"query": {
"term": {
"user_id_type": 3
}
},
"collapse": {
"field": "user_id"
}
}
结果
{
...
"hits": {
"hits": [
{
"_index": "es_qd_mkt_visitor_packet_dev_v1_20180621",
"_type": "ad_crowd",
"_source": {
"user_id": "wx2af8414b502d4ca2_oHtrD0Vxv-_8c678figJNHmtaVQQ",
"user_id_type": 3
},
"fields": {
"user_id": [
"wx2af8414b502d4ca2_oHtrD0Vxv-_8c678figJNHmtaVQQ"
]
}
}
]
}
}
总结:使用collapse字段后,查询结果中[hits]中会出现[fields]字段,其中包含了去重后的user_id
2 - count + distinct
SELECT COUNT(DISTINCT(user_id)) FROM table WHERE user_id_type = 3;
{
"query": {
"term": {
"user_id_type": 3
}
},
"aggs": {
"count": {
"cardinality": {
"field": "user_id"
}
}
}
}
结果
{
...
"hits": {
...
},
"aggregations": {
"count": {
"value": 121
}
}
}
总结:aggs中cardinality的字段代表需要distinct的字段
3 - count + group by
SELECT COUNT(user_id) FROM table GROUP BY user_id_type;
{
"aggs": {
"user_type": {
"terms": {
"field": "user_id_type"
}
}
}
}
结果
{
...
"hits": {
...
},
"aggregations": {
"user_type": {
...
"buckets": [
{
"key": 4,
"doc_count": 1220
},
{
"key": 3,
"doc_count": 488
}
]
}
}
}
总结:aggs中terms的字段代表需要gruop by的字段
4 - count + distinct + group by
SELECT COUNT(DISTINCT(user_id)) FROM table GROUP BY user_id_type;
{
"aggs": {
"user_type": {
"terms": {
"field": "user_id_type"
},
"aggs": {
"count": {
"cardinality": {
"field": "user_id"
}
}
}
}
}
}
结果
{
...
"hits": {
...
},
"aggregations": {
"user_type": {
...
"buckets": [
{
"key": 4,
"doc_count": 1220, //去重前数据1220条
"count": {
"value": 276 //去重后数据276条
}
},
{
"key": 3,
"doc_count": 488, //去重前数据488条
"count": {
"value": 121 //去重后数据121条
}
}
]
}
}
}
5 - count + distinct + group by + where
SELECT COUNT(DISTINCT(user_id)) FROM table WHERE user_id_type = 2 GROUP BY user_id;
总结:对于既有group by又有distinct的查询要求,需要在aggs中嵌套子aggs
6 - gourp by
"aggs": {
"all_province": {
"terms": {
"field": "lookupResult.group_worker.keyword"
}
}
7 - 注意事项
collapse关键字
转载:https://www.jianshu.com/p/62bed9cc8349