python利用scroll_id游标遍历查询es,并将所有查询结果写入txt
#download all hits from index of es
#use scroll_id
from elasticsearch import Elasticsearch
import json
es=Elasticsearch(["localhost:9200"])
body={
"_source":["fileName","fullPath","HashFeature"],
#"_source":["fileName"],
#"_source":["fullPath"],
#"_source":["HashFeature"],
"query":{
"match_all":{}
}
}
def get_search_result(es,index,doc_type,scroll='5m',timeout='1m',size=1000,body=body):
queryData = es.search(
index = index,
doc_type = doc_type,
scroll = scroll,
timeout = timeout,
size = size,
body = body
)
mdata = queryData.get("hits").get("hits")
if not mdata:
print('empty')
scroll_id = queryData["_scroll_id"]
total = queryData["hits"]["total"]
for i in range(int(total/1000)):
res = es.scroll(scroll_id=scroll_id,scroll='5m')
mdata = mdata + res["hits"]["hits"]
return mdata
if __name__ == "__main__":
result = get_search_result(es,'dh02_20180227','cc06_design')
#source = result['_source']
f = open('G:/lab614/12000.txt','w')
for item in result:
item2 = item['_source']
item_fullPath = item['_source']['fullPath']
item_HashFeature = item['_source']['HashFeature']
item_fileName = item['_source']['fileName']
f.writelines(item_fullPath+','+item_HashFeature+','+item_fileName)
f.write('\n')
f.close
#print(result)