from elasticsearch import Elasticsearch
es_client = Elasticsearch(hosts=ES_URL, timeout=600)
esdata = []
es_scroll_size = 200
scroll_tm = "10s"
query_data = es_client.search(index="xxx", scroll=scroll_tm, size=es_scroll_size, body=body)
if query_data.get("hits").get("hits"):
scroll_id = query_data["_scroll_id"]
esdata.extend([i['_source'] for i in query_data["hits"]["hits"]])
while True:
scroll_res = es_client.scroll(scroll_id=scroll_id, scroll=scroll_tm)
if not scroll_res.get("hits").get("hits"):
break
esdata.extend([i['_source'] for i in scroll_res["hits"]["hits"]])
scroll的数据以快照形式保存在内存,最多不超过24h,所以会缺失当前更新的数据。