from elasticsearch import Elasticsearch
from elasticsearch import helpers
gol_index_name = "索引名称"
es_client = Elasticsearch(["ip"], http_auth=('用户名', '用户名密码'), port=9200)
es_search_options = {"query": {"match_all": {}},"size":10000}
s1 = datetime.datetime.now()
es_result = es_client.search(index=gol_index_name, doc_type="vulnerability", body=es_search_options)
fianl_result = []
for i in es_result["hits"]["hits"]:
fianl_result += i["_source"]
print("aaaa", datetime.datetime.now()-s1)
es_search_options = {"query": {"match_all": {}}, "from":0,"size": 1000}
s1 = datetime.datetime.now()
fianl_result1 = []
for i in range(10):
es_search_options["from"] = i*100
es_result = es_client.search(index=gol_index_name, doc_type="vulnerability", body=es_search_options)
fianl_result1.extend(es_result["hits"]["hits"])
print("bbbbb", datetime.datetime.now() - s1)
scroll_id_op = {"query": {"match_all": {}}}
s1 = datetime.datetime.now()
page = es_client.search(index=gol_index_name, doc_type="vulnerability", body=scroll_id_op, scroll="1m", size=100,
timeout="3m")
sid = page['_scroll_id']
scroll_size = page['hits']['total']
# print(scroll_size)
final_result = []
final_result += [x['_source'] for x in page['hits']['hits']]
i = 1
while scroll_size > 0:
if i >= 101:
break
page = es_client.scroll(scroll_id=sid, scroll='2m')
# Update the scroll ID
sid = page['_scroll_id']
scroll_size = len(page['hits']['hits'])
docs = page['hits']['hits']
final_result += [x['_source'] for x in docs]
i += 1
print("ccccc", datetime.datetime.now() - s1)
以上只查询1万条数据,search可以实现(es默认10000条,如果超出,这不试用,但是这个默认查询的最大值1万可以修改) 单纯用search会比较节省时间
如果大于1万条数据或者大于咱们设置的search查询的最大阈值时用下两种方式可以实现查询, 不过都不能排序和实现web端的分页:
page = es_client.search(index=gol_index_name, doc_type="vulnerability", body=scroll_id_op, scroll="1m", size=100,
timeout="3m")
es_result = helpers.scan(
client=es_client,
query=es_search_options,
scroll='1m',
index=index_name,
doc_type="vulnerability",
timeout='1m'
)