目前经验:折叠只能用于筛选数据, 不能进行聚合,需要聚合,自己在内存中聚合
from elasticsearch import Elasticsearch
def query_2coll(index_name):
query = {'size': 10000,
"query": {
"match_all": {}
},
"collapse": {
"field": "subject_id",
"inner_hits": {
"name": "group",
"size": 5,
"sort": [{"group": "asc"}]
},
"max_concurrent_group_searches": 4
}
}
try:
datas = []
allDoc = es.search(index=index_name, doc_type='koala-index', body=query)
for data in allDoc['hits']['hits']:
print(data['_source']['id'], data['_source']['group'], data['_source']['subject_id'])
# for val, buckets in enumerate(data['_source']):
# if val>1:
# print('data error')
# print(buckets['_source'])
datas.append(data['_source'])
return datas
except Exception as e:
print('query1 error', e)
return datas
def query(index_name):
query ={'size':10000,
"query": {
"match_all": {}
},
"sort": [
{
"group": {
"order": "asc"
}
}
],
"collapse": {
"field": "group",
"terms": {
"field": "subject_id", 'size': 10000
}
},
"aggs": {
"agg_sex": {
"terms": {
"field": "subject_id",'size':10000
}
}
}
}
try:
datas=[]
allDoc = es.search(index=index_name, doc_type='koala-index', body=query)
for data in allDoc['hits']['hits']:
print(data['_source']['id'],data['_source']['group'],data['_source']['subject_id'])
# for val, buckets in enumerate(data['_source']):
# if val>1:
# print('data error')
# print(buckets['_source'])
datas.append(data['_source'])
return datas
except Exception as e:
print('query1 error', e)
return datas
def query_ju(index_name):
query = {
"from": 0,
"size": 10000,
"sort": [
{
"event_id": {
"order": "desc"
}
}
],
"collapse": {
"field": "subject_id",
"inner_hits": {
"name": "quality",
"size": 1,
"sort": [
{
"event_id": {
"order": "asc"
}
}
]
}
}
}
datas=[]
try:
allDoc = es.search(index=index_name, doc_type='koala-index', body=query)
for data in allDoc['hits']['hits']:
print(data['_source']['date'])
# for val, buckets in enumerate(data['_source']):
# if val>1:
# print('data error')
# print(buckets['_source'])
# datas.append(buckets['_source'])
return datas
except Exception as e:
print('query1 error', e)
return datas
if __name__ == '__main__':
es = Elasticsearch(['127.0.0.1:9200'])
index_name = 'event_tk'
query_2coll(index_name)