# -*- coding: utf-8 -*-
# @Time : 2024/5/14 10:40
# @Author : hjcui
# @Site :
# @File : 关键词对应的话单记录.py
# @Software: PyCharm
from elasticsearch import Elasticsearch
import os
kwd = "你好"
# 定义查询语句
query = \
{
"query": {
"bool": {
"should": [
{
"nested": {
"path": "kwdresult.B.kwdresults",
"query": {
"term": {
"kwdresult.B.kwdresults.keyword.keyword": {
"value": kwd
}
}
}
}
},
{
"nested": {
"path": "kwdresult.A.kwdresults",
"query": {
"term": {
"kwdresult.A.kwdresults.keyword.keyword": {
"value": kwd
}
}
}
}
}
]
}
}
}
# 执行滚动查询的函数
def scroll_query(es,index,query,scroll_size=1000,scroll_time='2m'):
result = es.search(index=index,body=query,size=scroll_size,scroll=scroll_time)
scroll_id = result['_scroll_id']
total_docs = result['hits']['total']
res = open(f'./{kwd}.txt', 'w', encoding='utf-8')
res.write(f"包含关键词 {kwd} 的话单数是: {total_docs},以下是话单ID\n")
# 处理获取的第一批数据
for hit in result['hits']['hits']:
source_data = hit['_source']
res.write(source_data['callid'] + '\n')
# print(source_data['callid'])
# 继续滚动查询剩余数据
while len(result['hits']['hits']) > 0:
result = es.scroll(scroll_id=scroll_id,scroll=scroll_time)
scroll_id = result['_scroll_id']
for hit in result['hits']['hits']:
source_data = hit['_source']
res.write(source_data['callid'] + '\n')
es.clear_scroll(scroll_id=scroll_id)
res.close()
print("The file saved sucessfully.")
if __name__ == '__main__':
es_conn = Elasticsearch(['192.168.0.147:9200', '192.168.0.148:9200', '192.168.0.149:9200', \
'192.168.0.150:9200', '192.168.0.141:9200'])
if es_conn.ping():
print("connected to Elasticsearch.")
else:
print("Could not connect to Elasticsearch.")
index = 'cr-all-2024.05.15'
scroll_query(es_conn,index,query,scroll_size=1000,scroll_time='2m')
09-30
4245
03-11
1811
08-22
2369