Elasticsearch 搜索返回的数据默认最大为 10000 条,如果需要返回全部的数据,需要修改默认配置或者 scroll 或者 search after 的解决方案。其中 search after 解决方案不需要额外的配置和操作是三者中最合适的解决方案
使用 HTTP 请求实践
GET phonebills/ _search
{
"query" : {
"term" : {
"name" : {
"value" : "鲁仲连"
}
}
} ,
"size" : 10000 ,
"sort" : [
{
"name" : {
"order" : "desc"
} ,
"_id" : {
"order" : "desc"
}
}
]
}
# 用上面放回的最后一个sort值传入search after
GET phonebills/ _search
{
"query" : {
"term" : {
"name" : {
"value" : "鲁仲连"
}
}
} ,
"size" : 10000 ,
"search_after" : [
"鲁仲连" ,
"zzzVumsBieuR7aAcDbV-"
] ,
"sort" : [
{
"name" : {
"order" : "desc"
} ,
"_id" : {
"order" : "desc"
}
}
]
}
使用 Java High Level REST Client 实践
public static void main ( String[ ] args) throws IOException {
RestHighLevelClient client = new RestHighLevelClient (
RestClient. builder (
new HttpHost ( "localhost" , 9200 , "http" ) ) ) ;
Object[ ] objects = new Object [ ] { "start" , "start" } ;
List< Map< String, Object> > data = new ArrayList < Map< String, Object> > ( ) ;
boolean type = true ;
while ( type) {
SearchHit[ ] hits = searchAfter ( client, objects) ;
objects = hits[ hits. length- 1 ] . getSortValues ( ) ;
if ( hits. length < 10000 ) type = false ;
for ( SearchHit hit : hits) {
data. add ( hit. getSourceAsMap ( ) ) ;
}
}
Iterator< Map< String, Object> > iterator = data. iterator ( ) ;
while ( iterator. hasNext ( ) ) {
System. out. println ( iterator. next ( ) . toString ( ) ) ;
}
System. out. println ( data. size ( ) + "-----------------" ) ;
client. close ( ) ;
}
public static SearchHit[ ] searchAfter ( RestHighLevelClient client, Object[ ] objects) throws IOException {
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder ( ) ;
sourceBuilder. query ( QueryBuilders. termQuery ( "name" , "鬼谷子" ) ) ;
sourceBuilder. size ( 10000 ) ;
sourceBuilder. sort ( "name" , SortOrder. DESC) ;
sourceBuilder. sort ( "_id" , SortOrder. DESC) ;
if ( ! objects[ 1 ] . toString ( ) . equals ( "start" ) && ! objects[ 1 ] . toString ( ) . equals ( "start" ) ) {
sourceBuilder. searchAfter ( objects) ;
}
SearchRequest searchRequest = new SearchRequest ( ) ;
searchRequest. indices ( "phonebills" ) ;
searchRequest. source ( sourceBuilder) ;
SearchResponse response = client. search ( searchRequest, RequestOptions. DEFAULT) ;
SearchHit[ ] hits = response. getHits ( ) . getHits ( ) ;
return hits;
}
windows单机版测试结果
数据总量 符合条件数据量 时间消耗m:s 1000000 78000 14.43 10000000 780000 1:17.15