Elasticsearch不支持聚合后分页
1. 分组
//select max(TEM_Max) from XX group by Station_Id_d
POST /XX/_search
{
"size": 0,
"aggs": {
"max_score": {
"terms": {
"field": "Station_Id_d"
},
"aggs": {
"max_score": {
"max": {
"field": "TEM_Max"
}
}
}
}
}
}
2、增加过滤
//select max(TEM_Max) from XX where time > 315504000000 and time < 1293811199000 group by Station_Id_d
POST /XX/_search
{"size": 0,
"query" : {
"bool" : {
"filter" : [
{ "range" : {
"time" : {
"gt" : 315504000000,
"lt" : 1293811199000
}
} }
]
}
},
"aggs" : {
"max_score": {
"terms": {
"field": "Station_Id_d"
},
"aggs": {
"max_score": {
"max": {
"field": "TEM_Max"
}
}
}
}
}
}
3、多值聚合
//select max(TEM_Max),min(TEM_Min),avg(TEM_Avg) from XX where time > 315504000000 and time < 1293811199000 group by Station_Id_d
POST /XX/_search
{"size": 0,
"query" : {
"bool" : {
"filter" : [
{ "range" : {
"time" : {
"gte" : 315504000000,
"lte" : 1293811199000
}
} }
]
}
},
"aggs" : {
"avg_score": {
"terms": {
"field": "Station_Id_d"
},
"aggs": {
"min_score": {
"min": {
"field": "TEM_Min"
}
},
"max_score": {
"max": {
"field": "TEM_Max"
}
}
,
"avg_score": {
"avg": {
"field": "TEM_Avg"
}
}
}
}
}
}
4、聚合默认显示10条
//显示13条
POST /XX/_search
{"size": 0,
"query" : {
"bool" : {
"filter" : [
{ "range" : {
"time" : {
"gte" : 315504000000,
"lte" : 1293811199000
}
}
}
]
}
},
"aggs" : {
"avg_score": {
"terms": {
"field": "Station_Id_d",
"size": 13
},
"aggs": {
"win_avg_score": {
"avg": {
"field": "WIN_S_2mi_Avg"
}
},
"PRS_Min_score": {
"min": {
"field": "PRS_Min"
}
},
"PRS_Max_score": {
"max": {
"field": "PRS_Max"
}
}
}
}
}
}
5、多个要素groupby
select min(A),max(B)… from surf_chn_mul_day where time>XX and time <xxx and Station_Id_d=51223 group by Station_Id_d,Mon,Day
{"size": 0,
"query" : {
"bool" : {
"filter" : [
{ "range" : {
"time" : {
"gte" : 315504000000,
"lte" : 1293811199000
}
}
},{ "range" : {
"Station_Id_d" : {
"gte" : 54645,
"lte" : 54645
}
}
}
]
}
},
"aggs" : {
"avg_score": {
"terms": {
"script": {
"inline": "doc['Station_Id_d'].value +'-'+ doc['Mon'].value+'-'+ doc['Day'].value "
},"size": 1000
},
"aggs": {
"WIN_D_INST_Max_score": {
"max": {
"field": "WIN_D_INST_Max"
}
} ,
"WIN_S_2mi_Avg_score": {
"avg": {
"field": "WIN_S_2mi_Avg"
}
},
"PRS_Min_score": {
"min": {
"field": "PRS_Min"
}
}
}
}
}
}
java代码
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import io.searchbox.client.JestClient;
import io.searchbox.core.Bulk;
import io.searchbox.core.Index;
import io.searchbox.core.Search;
import io.searchbox.core.SearchResult;
import io.searchbox.core.search.aggregation.TermsAggregation;
import io.searchbox.core.search.aggregation.TopHitsAggregation;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.script.Script;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class DayAggs365 {
static ObjectMapper mapper = new ObjectMapper();
static DecimalFormat df = new DecimalFormat("#0.00");
public static void dayAgg365(long start, long end, int stationId, String toIndexName) throws IOException {
JestClient client = ConfigData.getJestClient();
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
QueryBuilder queryBuilder = QueryBuilders.boolQuery()
.must(QueryBuilders.rangeQuery("time") //对time字段进行范围限定
.gte(start).lt(end)).must(QueryBuilders.termQuery("Station_Id_d", stationId));
searchSourceBuilder.query(queryBuilder);
AggregationBuilder aggregationBuilder =
AggregationBuilders.terms("Station_Id_dAgg").script(new Script("doc['Station_Id_d'].value +'-'+ doc['Mon'].value+'-'+ doc['Day'].value ")).size(1000)
//.script(new Script("doc['Datetime.keyword'].value.substring(5,7)"))
.subAggregation(AggregationBuilders.min("TEM_Min_score").field("TEM_Min"))
.subAggregation(AggregationBuilders.max("TEM_Max_score").field("TEM_Max"))
.subAggregation(AggregationBuilders.avg("TEM_Avg_score").field("TEM_Avg"))
.subAggregation(AggregationBuilders.max("WIN_S_Inst_Max_score").field("WIN_S_Inst_Max"))
.subAggregation(AggregationBuilders.max("WIN_D_INST_Max_score").field("WIN_D_INST_Max"))
.subAggregation(AggregationBuilders.max("WIN_S_Max_score").field("WIN_S_Max"))
.subAggregation(AggregationBuilders.max("WIN_D_S_Max_score").field("WIN_D_S_Max"))
.subAggregation(AggregationBuilders.avg("WIN_S_2mi_Avg_score").field("WIN_S_2mi_Avg"))
.subAggregation(AggregationBuilders.avg("PRE_Time_2020_score").field("PRE_Time_2020"))
.subAggregation(AggregationBuilders.avg("PRE_Time_0808_score").field("PRE_Time_0808"))
.subAggregation(AggregationBuilders.avg("PRE_Time_2008_score").field("PRE_Time_2008"))
.subAggregation(AggregationBuilders.avg("PRE_Time_0820_score").field("PRE_Time_0820"))
.subAggregation(AggregationBuilders.min("PRS_Min_score").field("PRS_Min"))
.subAggregation(AggregationBuilders.max("PRS_Max_score").field("PRS_Max"))
.subAggregation(AggregationBuilders.avg("PRS_Avg_score").field("PRS_Avg"))
.subAggregation(AggregationBuilders.min("RHU_Min_score").field("RHU_Min"))
.subAggregation(AggregationBuilders.avg("RHU_Avg_score").field("RHU_Avg"))
.subAggregation(AggregationBuilders.min("VIS_Min_score").field("VIS_Min"));
searchSourceBuilder.aggregation(aggregationBuilder);
// System.out.println("searchSourceBuilder = " + searchSourceBuilder.toString());
String query = searchSourceBuilder.toString();
Search search = new Search.Builder(query).addIndex("surf_chn_mul_day").addType("surf_chn_mul_day").build();
SearchResult result = client.execute(search);
// System.out.println("result = " + result.getJsonString());
List<TermsAggregation.Entry> nameAgg = result.getAggregations().getTermsAggregation("Station_Id_dAgg").getBuckets();
List<Map<String, Object>> mapList = new ArrayList<>();
for (TermsAggregation.Entry entry : nameAgg) {
Map<String, Object> maps = new HashMap<>();
String[] keys = entry.getKeyAsString().split("-");
maps.put("Station_Id_C", Float.valueOf(keys[0]).intValue()+"");
maps.put("Station_Id_d", Float.valueOf(keys[0]).intValue());
maps.put("Mon",Float.valueOf(keys[1]));
maps.put("Day",Float.valueOf(keys[2]));
maps.put("TEM_Min", Float.valueOf(df.format(entry.getAvgAggregation("TEM_Min_score").getAvg())));
maps.put("TEM_Max", Float.valueOf(df.format(entry.getAvgAggregation("TEM_Max_score").getAvg())));
maps.put("TEM_Avg", Float.valueOf(df.format(entry.getAvgAggregation("TEM_Avg_score").getAvg())));
maps.put("WIN_S_Inst_Max", Float.valueOf(df.format(entry.getAvgAggregation("WIN_S_Inst_Max_score").getAvg())));
// maps.put("WIN_D_INST_Max",Float.valueOf(df.format(entry.getAvgAggregation("WIN_D_INST_Max_score").getAvg())));
maps.put("WIN_S_Max", Float.valueOf(df.format(entry.getAvgAggregation("WIN_S_Max_score").getAvg())));
// maps.put("WIN_D_S_Max",Float.valueOf(df.format(entry.getAvgAggregation("WIN_D_S_Max_score").getAvg())));
maps.put("WIN_S_2mi_Avg", Float.valueOf(df.format(entry.getAvgAggregation("WIN_S_2mi_Avg_score").getAvg())));
maps.put("PRE_Time_2020", Float.valueOf(df.format(entry.getAvgAggregation("PRE_Time_2020_score").getAvg())));
maps.put("PRE_Time_0808", Float.valueOf(df.format(entry.getAvgAggregation("PRE_Time_0808_score").getAvg())));
maps.put("PRE_Time_2008", Float.valueOf(df.format(entry.getAvgAggregation("PRE_Time_2008_score").getAvg())));
maps.put("PRE_Time_0820", Float.valueOf(df.format(entry.getAvgAggregation("PRE_Time_0820_score").getAvg())));
maps.put("PRS_Min", Float.valueOf(df.format(entry.getAvgAggregation("PRS_Min_score").getAvg())));
maps.put("PRS_Max", Float.valueOf(df.format(entry.getAvgAggregation("PRS_Max_score").getAvg())));
maps.put("PRS_Avg", Float.valueOf(df.format(entry.getAvgAggregation("PRS_Avg_score").getAvg())));
maps.put("RHU_Min", Float.valueOf(df.format(entry.getAvgAggregation("RHU_Min_score").getAvg())));
maps.put("RHU_Avg", Float.valueOf(df.format(entry.getAvgAggregation("RHU_Avg_score").getAvg())));
maps.put("VIS_Min", Float.valueOf(df.format(entry.getAvgAggregation("VIS_Min_score").getAvg())));
mapList.add(maps);
}
System.out.println(new Gson().toJson(mapList));
Bulk.Builder bulk = new Bulk.Builder();
for (Map<String, Object> record : mapList) {
String id = record.get("Station_Id_d").toString()+"-"+record.get("Mon").toString()+"-"+record.get("Day").toString();
Index index = new Index.Builder(record).id(id).index(toIndexName).type(toIndexName).build();
bulk.addAction(index);
}
try {
ConfigData.getJestClient().execute(bulk.build());
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) throws IOException {
dayAgg365(315504000000l, 1293811199000l, 54619, "surf_chn_mul_day_his");
}
}
支持group by substr(‘字段’,5,6)
#Datetime 2016-05-01 00:00:00
"inline": "doc['Station_Id_d'].value +'-'+ doc['Datetime.keyword'].value.substring(5,7)"
6、http方式聚合
"trackTime": {
"type": "date",
"ignore_malformed": true,
"format": "yyyy-MM-dd HH:mm:ss"
}
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"range": {
"trackTime": {
"gte": "2020-05-26 10:00:00",
"lte": "2020-05-26 20:00:00"
}
}
}
]
}
},
"aggs": {
"sum_score": {
"terms": {
"script": {
"inline": "doc['username.keyword'].value +'-'+ doc['uri.keyword'].value"
}
},
"aggs": {
"sum_score": {
"sum": {
"field": "_score"
}
}
}
}
}
}
注意,若时间
"inline": "doc['stationId'].value+'-'+doc['observeTime'].value.toString('MM')"
"inline": "doc['stationId'].value+'-'+doc['observeTime'].value.toString('yyyy-MM-dd HH:mm:ss')"
long转时间
"inline": "new Date(doc['time'].value)"
SearchResult result = xxx;
List<TermsAggregation.Entry> nameAgg = result.getAggregations().getTermsAggregation("sum_score").getBuckets();
return nameAgg.stream().collect(Collectors.groupingBy(f -> f.getKey().split("-")[0],
Collectors.toMap(v -> v.getKey().split("-")[1], v -> v.getCount())));
7、时间聚合
{
"query": {
"bool": {
"must": [
{
"range": {
"time": {
"gte": 331228800000,
"lte": 1596211200000
}
}
}
]
}
},
"size": 0,
"aggs": {
"seasonAgg": {
"date_histogram": {
"field": "time",
"interval": "1q"
},
"aggs": {
"PRS_Avg_score": {
"avg": {
"field": "PRS_Avg"
}
}
}
}
}
}
year(1y)年
quarter(1q)季度
month(1M)月份
week(1w)星期
day(1d)天
hour(1h)小时
minute(1m)分钟
second(1s)秒
8、聚合count
{
"query": {
"match": {
"stationId": "57418"
}
},
"aggs": {
"windAggs": {
"terms": {
"script": {
"source": "doc['observeMonthDay'].value.substring(0,2)"
}
}
}
}
}
}
{
"query": {
"match": {
"stationId": "57418"
}
},
"aggs": {
"windAggs": {
"terms": {
"field": "Mon"
}
}
}
}
}
AggregationBuilder aggregationBuilder =
AggregationBuilders.terms("windAgg")
.script(new Script("doc['observeMonthDay'].value.substring(0,2)"))
// .field("Mon")
.size(366);
Elasticsearch要实现聚合后分页,该怎么办?
全量聚合,size设置为: 2147483647。
ES5.X/6.X版本设置为2147483647 ,它等于2^31-1,
是32位操作系统中最大的符号型整型常量;ES1.X 2.X版本设置为0。