指标(Metrics)
桶能够让我们对文档进行有意义的划分,但是最终我们还是需要对每个桶中的文档进行某种指标计算。分桶是达到最终目的的手段:提供了对文档进行划分的方法,从而让你能够计算需要的指标。
多数指标仅仅是简单的数学运算(比如,min,mean,max以及sum),它们使用文档中的值进行计算。在实际应用中,指标能够让你计算例如平均薪资,最高出售价格,或者百分之95的查询延迟。
将两者结合起来
一个聚合就是一些桶和指标的组合。一个聚合可以只有一个桶,或者一个指标,或者每样一个。在桶中甚至可以有多个嵌套的桶。比如,我们可以将文档按照其所属国家进行分桶,然后对每个桶计算其平均薪资(一个指标)。
因为桶是可以嵌套的,我们能够实现一个更加复杂的聚合操作:
- 将文档按照国家进行分桶。(桶)
- 然后将每个国家的桶再按照性别分桶。(桶)
- 然后将每个性别的桶按照年龄区间进行分桶。(桶)
- 最后,为每个年龄区间计算平均薪资。(指标)
此时,就能够得到每个<国家,性别,年龄>组合的平均薪资信息了。它可以通过一个请求,一次数据遍历来完成
javaAPI
现有索引数据:
index:school
type:student --------------------------------------------------- {"grade":"1", "class":"1", "name":"xiao 1"} {"grade":"1", "class":"1", "name":"xiao 2"} {"grade":"1", "class":"2", "name":"xiao 3"} {"grade":"1", "class":"2", "name":"xiao 4"} {"grade":"1", "class":"2", "name":"xiao 5"}
Java分组统计年级和班级学生个数,如SQL: SELECT grade,class,count(1) FROM student GROUP BY grade,class;
- package test;
-
- import java.util.Iterator;
- import java.util.Map;
-
- import org.elasticsearch.action.search.SearchRequestBuilder;
- import org.elasticsearch.action.search.SearchResponse;
- import org.elasticsearch.action.search.SearchType;
- import org.elasticsearch.search.aggregations.Aggregation;
- import org.elasticsearch.search.aggregations.AggregationBuilders;
- import org.elasticsearch.search.aggregations.bucket.terms.StringTerms;
- import org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket;
- import org.elasticsearch.search.aggregations.bucket.terms.TermsBuilder;
- import org.junit.Test;
-
- import utils.NesUtils;
-
- public class TestAggregation
- {
- @Test
- public void testAggregation()
- {
- SearchRequestBuilder srb = NesUtils.getSearcher("school");
- srb.setTypes("student");
- srb.setSearchType(SearchType.COUNT);
-
- TermsBuilder gradeTermsBuilder = AggregationBuilders.terms("gradeAgg").field("grade");
- TermsBuilder classTermsBuilder = AggregationBuilders.terms("classAgg").field("class");
-
- gradeTermsBuilder.subAggregation(classTermsBuilder);
-
- srb.addAggregation(gradeTermsBuilder);
-
- SearchResponse sr = srb.execute().actionGet();
-
- Map<String, Aggregation> aggMap = sr.getAggregations().asMap();
-
- StringTerms gradeTerms = (StringTerms) aggMap.get("gradeAgg");
-
- Iterator<Bucket> gradeBucketIt = gradeTerms.getBuckets().iterator();
-
- while(gradeBucketIt.hasNext())
- {
- Bucket gradeBucket = gradeBucketIt.next();
- System.out.println(gradeBucket.getKey() + "年级有" + gradeBucket.getDocCount() +"个学生。");
-
- StringTerms classTerms = (StringTerms) gradeBucket.getAggregations().asMap().get("classAgg");
- Iterator<Bucket> classBucketIt = classTerms.getBuckets().iterator();
-
- while(classBucketIt.hasNext())
- {
- Bucket classBucket = classBucketIt.next();
- System.out.println(gradeBucket.getKey() + "年级" +classBucket.getKey() + "班有" + classBucket.getDocCount() +"个学生。");
- }
- System.out.println();
- }
-
- }
- }
运行完成输出结果
---------------------------------------------------
1年级有5个学生。 1年级2班有3个学生。 1年级1班有2个学生
实现一个SQL: SELECT sum(field) from table group by field2
使用:AggregationBuilders.sum("name").field("field");
- public static void searchTest() throws IOException {
- TermsBuilder companyNameAgg = AggregationBuilders.terms("companyName").field("companyName").size(10);
- SumBuilder companyNameAggSum = AggregationBuilders.sum("companyNameSum").field("cvcount");
- companyNameAgg.subAggregation(companyNameAggSum);
- SearchRequestBuilder searchBuilder = ElasticClientFactory.getClient().prepareSearch(indexname).
- setTypes(typeName).addAggregation(companyNameAgg);
- SearchResponse searchResponse = searchBuilder.execute().actionGet();
- Terms terms = searchResponse.getAggregations().get("companyName");
- List<Terms.Bucket> buckets = terms.getBuckets();
- List<String> list = Lists.newArrayList();
- for (Terms.Bucket bucket : buckets) {
- InternalSum internalSum = bucket.getAggregations().get("companyNameSum");
- System.out.println(bucket.getKeyAsString() + "\t" + bucket.getDocCount() + "\t"+internalSum.getValue());
- }
- System.out.println("done");
- }
案例2
PUT /company
{
"mappings": {
"employee": {
"properties": {
"age": {
"type": "long"
},
"country": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"fielddata": true
},
"join_date": {
"type": "date"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"position": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"salary": {
"type": "long"
}
}
}
}
}
GET /company/employee/_search
{
"size": 0,
"aggs": {
"group_by_country": {
"terms": {
"field": "country"
},
"aggs": {
"group_by_join_date": {
"date_histogram": {
"field": "join_date",
"interval": "year"
},
"aggs": {
"avg_salary": {
"avg": {
"field": "salary"
}
}
}
}
}
}
}
}
public class EmployeeAggrApp {
@SuppressWarnings({ "unchecked", "resource" })
public static void main(String[] args) throws Exception {
Settings settings = Settings.builder()
.put("cluster.name", "elasticsearch")
.build();
TransportClient client = new PreBuiltTransportClient(settings)
.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName("localhost"), 9300));
SearchResponse searchResponse = client.prepareSearch("company")
.addAggregation(AggregationBuilders.terms("group_by_country").field("country")
.subAggregation(AggregationBuilders
.dateHistogram("group_by_join_date")
.field("join_date")
.dateHistogramInterval(DateHistogramInterval.YEAR)
.subAggregation(AggregationBuilders.avg("avg_salary").field("salary")))
)
.execute().actionGet();
Map<String, Aggregation> aggrMap = searchResponse.getAggregations().asMap();
StringTerms groupByCountry = (StringTerms) aggrMap.get("group_by_country");
Iterator<Bucket> groupByCountryBucketIterator = groupByCountry.getBuckets().iterator();
while(groupByCountryBucketIterator.hasNext()) {
Bucket groupByCountryBucket = groupByCountryBucketIterator.next();
System.out.println(groupByCountryBucket.getKey() + ":" + groupByCountryBucket.getDocCount());
Histogram groupByJoinDate = (Histogram) groupByCountryBucket.getAggregations().asMap().get("group_by_join_date");
Iterator<org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Bucket> groupByJoinDateBucketIterator = groupByJoinDate.getBuckets().iterator();
while(groupByJoinDateBucketIterator.hasNext()) {
org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Bucket groupByJoinDateBucket = groupByJoinDateBucketIterator.next();
System.out.println(groupByJoinDateBucket.getKey() + ":" +groupByJoinDateBucket.getDocCount());
Avg avg = (Avg) groupByJoinDateBucket.getAggregations().asMap().get("avg_salary");
System.out.println(avg.getValue());
}
}
client.close();
}
}