# -*- coding: utf-8 -*-
from elasticsearch import Elasticsearch
import requests
import os
import datetime
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
conv_time = str(yesterday).replace('-','.')
conn = Elasticsearch(['192.168.10.147:9200','192.168.10.148:9200','192.168.10.149:9200','192.168.10.150:9200','192.168.10.141:9200'])
es_index = 'cr-stat-{}'.format(conv_time)
dstPath = r'\\192.168.10.82\昨天数据\{}'.format(yesterday)
if not os.path.exists(dstPath):
os.makedirs(dstPath)
def down_load(plat,lang):
total_query = \
{
"from": 0,
"size": 1000,
"query": {
"bool": {
"must": [
{
"term": {
"languagename.keyword": {
"value": lang
}
}
},
{
"term": {
"platformid.keyword": {
"value": plat
}
}
},
{
"range": {
"callsecs": {
"gte": 60,
"lte": 20000
}
}
}
]
}
}
}
es_res = conn.search(index=es_index,body=total_query,scroll='10m',size=1000)
scroll_size = es_res['hits']['total']
try:
for i in range(int(scroll_size/1000)):
scroll_id = es_res['_scroll_id']
results = conn.scroll(scroll_id=scroll_id, scroll='5m')
# print(len(results['hits']['hits']))
for res in results['hits']['hits']:
urls = res['_source']['files']
# print(urls)
for value in urls.values():
name = os.path.basename(value)
file_path = os.path.join(dstPath, name)
response = requests.get(value)
try:
with open(file_path, 'wb') as ff:
ff.write(response.content)
except Exception as e:
print(e)
except Exception as e:
print(e)
if __name__ == '__main__':
down_load('600','汉语')
不受elasticsearch查询上限(10000)的限制,可以最大化的从数据库获取数据
最新推荐文章于 2024-07-12 12:16:53 发布