Facet【聚类】,在elacticsearch搜索结果的基础上,对某个字段按照内容的不同进行分类统计。
package bio_rdf.impl.fieldfacet;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.QueryStringQueryBuilder;
import org.elasticsearch.search.facet.FacetBuilders;
import org.elasticsearch.search.facet.terms.TermsFacet;
//
public class ESFieldFacet {
@SuppressWarnings("deprecation")
public static void main(String[] args) {
// establish the client
Settings settings = ImmutableSettings.settingsBuilder()
.put("cluster.name", "genbank").build();
@SuppressWarnings("resource")
Client client = new TransportClient(settings)
.addTransportAddress(new InetSocketTransportAddress(
"10.0.26.1", 9300));
QueryBuilder queryBuilder = QueryBuilders.boolQuery().must(
new QueryStringQueryBuilder("\"Kapatral,V.\"")
.defaultField("references.authors"));
SearchRequestBuilder searchRequestBuilder = client.prepareSearch("test")
.setTypes("seqs").setSearchType(SearchType.DEFAULT).setFrom(0)
.setSize(50);
// hits result
searchRequestBuilder.setQuery(queryBuilder).addFacet(
FacetBuilders.termsFacet("f").field("seqs.organism")
.size(10));
SearchResponse response = searchRequestBuilder.execute().actionGet();
TermsFacet f = (TermsFacet) response.getFacets().facetsAsMap().get("f");
for (TermsFacet.Entry entry : f) {
System.out.println("--------------start------------------");
System.out.println(entry.getTerm());
System.out.println(entry.getCount());
System.out.println("---------------end-----------------");
}
client.close();
}
}
result:
--------------start------------------
Fusobacterium nucleatum subsp. vincentii ATCC 49256
302
---------------end-----------------
以上结果出现的前提是:organism字段在录入索引的时候,elasticsearch不对其进行解析。
相反,当elasticsearch对organism进行解析的时候,结果就是下面的内容【因为当es对字段进行解析的时候,首先调用standard解析器,先把字段按照一定格式分隔,然后再将大写字母变为小写,变成了“一袋子”单词】
--------------start------------------
vincentii
302
---------------end-----------------
--------------start------------------
subsp
302
---------------end-----------------
--------------start------------------
nucleatum
302
---------------end-----------------
--------------start------------------
fusobacterium
302
---------------end-----------------
--------------start------------------
atcc49256
302
---------------end-----------------