code总结——python连接es,利用scroll遍历查询

 python利用scroll_id游标遍历查询es,并将所有查询结果写入txt

#download all hits from index of es
#use scroll_id
from elasticsearch import Elasticsearch
import json
es=Elasticsearch(["localhost:9200"])
body={
        "_source":["fileName","fullPath","HashFeature"],
        #"_source":["fileName"],
        #"_source":["fullPath"],
        #"_source":["HashFeature"],
        "query":{
            "match_all":{}
        }
    }
def get_search_result(es,index,doc_type,scroll='5m',timeout='1m',size=1000,body=body):
    queryData = es.search(
        index = index,
        doc_type = doc_type,
        scroll = scroll,
        timeout = timeout,
        size = size,
        body = body
    )
  
    mdata = queryData.get("hits").get("hits")
    
    if not mdata:
        print('empty')
    scroll_id = queryData["_scroll_id"]
    total = queryData["hits"]["total"]
    for i in range(int(total/1000)):
        res = es.scroll(scroll_id=scroll_id,scroll='5m')
        
        mdata = mdata + res["hits"]["hits"]
    
    return mdata       


if __name__ == "__main__":
    result = get_search_result(es,'dh02_20180227','cc06_design')
    #source = result['_source']
    f = open('G:/lab614/12000.txt','w')
    for item in result:
        item2 = item['_source']
        item_fullPath = item['_source']['fullPath']
        item_HashFeature = item['_source']['HashFeature']
        item_fileName = item['_source']['fileName']
        
        f.writelines(item_fullPath+','+item_HashFeature+','+item_fileName)
        f.write('\n')
    f.close
    #print(result)
    

 

  • 7
    点赞
  • 16
    收藏
    觉得还不错? 一键收藏
  • 4
    评论
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值