1、封装工具类
工具类使用单例模式,自动初始化Elasticsearch实例,并提供一系列方法,如:search、count、get、index、bulk、scroll等
import threading
from elasticsearch import Elasticsearch
class ESClient(object):
"""
elasticsearch客户端工具类
"""
# 加锁,保证单例模式的线程安全
_instance_lock = threading.Lock()
# 单例模式
def __new__(cls, *args, **kwargs):
if not hasattr(ESClient, "_instance"):
with ESClient._instance_lock:
if not hasattr(ESClient, "_instance"):
ESClient._instance = object.__new__(cls)
return ESClient._instance
def __init__(self):
"""
初始化es连接实例,该实例默认自带Urllib3HttpConnection连接池,默认连接池大小maxsize=10
classelasticsearch.connection.Urllib3HttpConnection(host='localhost', port=None, http_auth=None,
use_ssl=False, verify_certs=<object object>, ssl_show_warn=<object object>, ca_certs=None, client_cert=None,
client_key=None, ssl_version=None, ssl_assert_hostname=None, ssl_assert_fingerprint=None, maxsize=10,
headers=None, ssl_context=None, http_compress=None, cloud_id=None, api_key=None, opaque_id=None, **kwargs)
"""
self.__es = Elasticsearch(
['192.168.0.9:9200'],
# # sniff before doing anything
sniff_on_start=True,
# refresh nodes after a node fails to respond
sniff_on_connection_fail=True,
# and also every 60 seconds
sniff_timeout=60
)
@property
def client(self):
"""
获取客户端
"""
return self.__es
def search(self, index, body=None):
"""
按条件查询
:param index: 索引名称
:param body: Query DSL json
:return: 结果
"""
return self.__es.search(index=index, body=body)
def count(self, index, body=None):
"""
按条件统计满足条件的数量
:param index: 索引名称
:param body: Query DSL json
:return: 结果
"""
return self.__es.count(index=index, body=body)
def get(self, index, data_id):
"""
获取文档详情
:param index: 索引名称
:param data_id: 文档ID
:return: 结果
"""
return self.__es.get(index=index, id=data_id)
def get_source(self, index, data_id):
"""
获取文档内容
:param index: 索引名称
:param data_id: 文档ID
:return: 结果
"""
return self.__es.get_source(index=index, id=data_id)
def index(self, index, body):
"""
向index写入文档
:param index: 索引名称
:param body: 文档数据内容
:return:
"""
self.__es.index(index, body=body)
def bulk(self, body=None):
"""
向index里批量保存文档数据
:param body: 文档数据内容
:return: 结果
"""
return self.__es.bulk(body=body)
def exists(self, index, data_id):
"""
判断文档是否存在
:param index: 索引名称
:param data_id: 文档ID
:return:
"""
return self.__es.exists(index=index, id=data_id)
def scroll(self, index, body, size=100, scroll='5m'):
"""
通过一次检索获取所有满足条件的数据
默认的search只能返回一页数据,
:param index: 索引名称
:param body: 查询条件
:param size: 每一批返回的最大数据量
:param scroll: search context 存货的时间长度
:return: 结果
"""
data = self.__es.search(index=index, body=body, scroll=scroll, size=size)
hits_source = data['hits']['hits']
# 获取游标用于es查询出的所有结果
scroll_id = data['_scroll_id']
total = data['hits']['total']
for i in range(total['value'] // size):
# print(hits_source)
hits_source += self.__es.scroll(scroll_id=scroll_id, scroll=scroll)['hits']['hits']
return hits_source
2、使用
es = ESClient()
(1)创建索引
# 创建索引
es.client.indices.create(index='test-index', body={
"mappings": {
"properties": {
"name": {
"type": "keyword"
},
"content": {
"type": "keyword"
},
"datetime": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
}
}
}
})
通过kibana查看索引详情
(2)向索引创建文档
# 创建单个文档
es.index('test-index', body={
"name": "测试",
"content": "测试内容",
"datetime": "2011-11-11 11:11:11",
})
# 批量创建文档
bulk_data = [{"index": {
'_index': "test-index",
}}, {
"name": "批量测试1",
"content": "批量测试内容1",
"datetime": "2011-11-11 11:11:11",
}, {"index": {
'_index': "test-index",
}}, {
"name": "批量测试2",
"content": "批量测试内容2",
"datetime": "2011-11-11 11:11:11",
}, {"index": {
'_index': "test-index",
}}, {
"name": "批量测试3",
"content": "批量测试内容3",
"datetime": "2011-11-11 11:11:11",
}]
es.bulk(body=bulk_data)
通过kibana查看结果
(3)检索文档
data1 = es.search('test-index', body={
"query": {
"match_all": {}
}
})
print(data1)
结果
{'took': 0, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 4, 'relation': 'eq'}, 'max_score': 1.0, 'hits': [{'_index': 'test-index', '_type': '_doc', '_id': 'JgecSXgB86SCYdV04mm-', '_score': 1.0, '_source': {'name': '测试', 'content': '测试内容', 'datetime': '2011-11-11 11:11:11'}}, {'_index': 'test-index', '_type': '_doc', '_id': 'JwezSXgB86SCYdV0-Gn1', '_score': 1.0, '_source': {'name': '批量测试1', 'content': '批量测试内容1', 'datetime': '2011-11-11 11:11:11'}}, {'_index': 'test-index', '_type': '_doc', '_id': 'KAezSXgB86SCYdV0-Gn1', '_score': 1.0, '_source': {'name': '批量测试2', 'content': '批量测试内容2', 'datetime': '2011-11-11 11:11:11'}}, {'_index': 'test-index', '_type': '_doc', '_id': 'KQezSXgB86SCYdV0-Gn1', '_score': 1.0, '_source': {'name': '批量测试3', 'content': '批量测试内容3', 'datetime': '2011-11-11 11:11:11'}}]}}
(4)通过scroll获取所有数据
# 数据总数为4,每次获取3条,需要2次才能获取完整
data1 = es.scroll(index='test-index', size=3, body={
"query": {
"match_all": {}
}
})
print(data1)
结果
[{'_index': 'test-index', '_type': '_doc', '_id': 'JgecSXgB86SCYdV04mm-', '_score': 1.0, '_source': {'name': '测试', 'content': '测试内容', 'datetime': '2011-11-11 11:11:11'}}, {'_index': 'test-index', '_type': '_doc', '_id': 'JwezSXgB86SCYdV0-Gn1', '_score': 1.0, '_source': {'name': '批量测试1', 'content': '批量测试内容1', 'datetime': '2011-11-11 11:11:11'}}, {'_index': 'test-index', '_type': '_doc', '_id': 'KAezSXgB86SCYdV0-Gn1', '_score': 1.0, '_source': {'name': '批量测试2', 'content': '批量测试内容2', 'datetime': '2011-11-11 11:11:11'}}, {'_index': 'test-index', '_type': '_doc', '_id': 'KQezSXgB86SCYdV0-Gn1', '_score': 1.0, '_source': {'name': '批量测试3', 'content': '批量测试内容3', 'datetime': '2011-11-11 11:11:11'}}]