python操作elasticsearch

最新推荐文章于 2024-07-27 18:55:40 发布

yuer5531

最新推荐文章于 2024-07-27 18:55:40 发布

阅读量529

点赞数

分类专栏： python

本文链接：https://blog.csdn.net/yuer5531/article/details/115009193

版权

python 专栏收录该内容

8 篇文章 1 订阅

订阅专栏

1、封装工具类

工具类使用单例模式，自动初始化Elasticsearch实例，并提供一系列方法，如：search、count、get、index、bulk、scroll等

import threading

from elasticsearch import Elasticsearch


class ESClient(object):
    """
    elasticsearch客户端工具类
    """
    # 加锁，保证单例模式的线程安全
    _instance_lock = threading.Lock()

    # 单例模式
    def __new__(cls, *args, **kwargs):
        if not hasattr(ESClient, "_instance"):
            with ESClient._instance_lock:
                if not hasattr(ESClient, "_instance"):
                    ESClient._instance = object.__new__(cls)
        return ESClient._instance

    def __init__(self):
        """
        初始化es连接实例，该实例默认自带Urllib3HttpConnection连接池，默认连接池大小maxsize=10
            classelasticsearch.connection.Urllib3HttpConnection(host='localhost', port=None, http_auth=None,
            use_ssl=False, verify_certs=<object object>, ssl_show_warn=<object object>, ca_certs=None, client_cert=None,
            client_key=None, ssl_version=None, ssl_assert_hostname=None, ssl_assert_fingerprint=None, maxsize=10,
            headers=None, ssl_context=None, http_compress=None, cloud_id=None, api_key=None, opaque_id=None, **kwargs)
        """
        self.__es = Elasticsearch(
            ['192.168.0.9:9200'],
            # # sniff before doing anything
            sniff_on_start=True,
            # refresh nodes after a node fails to respond
            sniff_on_connection_fail=True,
            # and also every 60 seconds
            sniff_timeout=60
        )

    @property
    def client(self):
        """
        获取客户端
        """
        return self.__es

    def search(self, index, body=None):
        """
        按条件查询
        :param index: 索引名称
        :param body: Query DSL json
        :return: 结果
        """
        return self.__es.search(index=index, body=body)

    def count(self, index, body=None):
        """
        按条件统计满足条件的数量
        :param index: 索引名称
        :param body: Query DSL json
        :return: 结果
        """
        return self.__es.count(index=index, body=body)

    def get(self, index, data_id):
        """
        获取文档详情
        :param index: 索引名称
        :param data_id: 文档ID
        :return: 结果
        """
        return self.__es.get(index=index, id=data_id)

    def get_source(self, index, data_id):
        """
        获取文档内容
        :param index: 索引名称
        :param data_id: 文档ID
        :return: 结果
        """
        return self.__es.get_source(index=index, id=data_id)

    def index(self, index, body):
        """
        向index写入文档
        :param index: 索引名称
        :param body: 文档数据内容
        :return:
        """
        self.__es.index(index, body=body)

    def bulk(self, body=None):
        """
        向index里批量保存文档数据
        :param body: 文档数据内容
        :return: 结果
        """
        return self.__es.bulk(body=body)

    def exists(self, index, data_id):
        """
        判断文档是否存在
        :param index: 索引名称
        :param data_id: 文档ID
        :return:
        """
        return self.__es.exists(index=index, id=data_id)

        def scroll(self, index, body, size=100, scroll='5m'):
        """
        通过一次检索获取所有满足条件的数据
        默认的search只能返回一页数据，
        :param index: 索引名称
        :param body: 查询条件
        :param size: 每一批返回的最大数据量
        :param scroll: search context 存货的时间长度
        :return: 结果
        """
        data = self.__es.search(index=index, body=body, scroll=scroll, size=size)
        hits_source = data['hits']['hits']
        # 获取游标用于es查询出的所有结果
        scroll_id = data['_scroll_id']
        total = data['hits']['total']
        for i in range(total['value'] // size):
            # print(hits_source)
            hits_source += self.__es.scroll(scroll_id=scroll_id, scroll=scroll)['hits']['hits']

        return hits_source

2、使用

es = ESClient()

（1）创建索引

# 创建索引
    es.client.indices.create(index='test-index', body={
        "mappings": {
            "properties": {
                "name": {
                    "type": "keyword"
                },
                "content": {
                    "type": "keyword"
                },
                "datetime": {
                    "type": "date",
                    "format": "yyyy-MM-dd HH:mm:ss"
                }
            }
        }
    })

通过kibana查看索引详情
在这里插入图片描述

（2）向索引创建文档

# 创建单个文档
    es.index('test-index', body={
        "name": "测试",
        "content": "测试内容",
        "datetime": "2011-11-11 11:11:11",

    })
    
    # 批量创建文档
    bulk_data = [{"index": {
        '_index': "test-index",
    }}, {
        "name": "批量测试1",
        "content": "批量测试内容1",
        "datetime": "2011-11-11 11:11:11",
    }, {"index": {
        '_index': "test-index",
    }}, {
        "name": "批量测试2",
        "content": "批量测试内容2",
        "datetime": "2011-11-11 11:11:11",
    }, {"index": {
        '_index': "test-index",
    }}, {
        "name": "批量测试3",
        "content": "批量测试内容3",
        "datetime": "2011-11-11 11:11:11",
    }]
    es.bulk(body=bulk_data)

通过kibana查看结果
在这里插入图片描述

（3）检索文档

data1 = es.search('test-index', body={
        "query": {
            "match_all": {}
        }
    })
    print(data1)

结果

{'took': 0, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 4, 'relation': 'eq'}, 'max_score': 1.0, 'hits': [{'_index': 'test-index', '_type': '_doc', '_id': 'JgecSXgB86SCYdV04mm-', '_score': 1.0, '_source': {'name': '测试', 'content': '测试内容', 'datetime': '2011-11-11 11:11:11'}}, {'_index': 'test-index', '_type': '_doc', '_id': 'JwezSXgB86SCYdV0-Gn1', '_score': 1.0, '_source': {'name': '批量测试1', 'content': '批量测试内容1', 'datetime': '2011-11-11 11:11:11'}}, {'_index': 'test-index', '_type': '_doc', '_id': 'KAezSXgB86SCYdV0-Gn1', '_score': 1.0, '_source': {'name': '批量测试2', 'content': '批量测试内容2', 'datetime': '2011-11-11 11:11:11'}}, {'_index': 'test-index', '_type': '_doc', '_id': 'KQezSXgB86SCYdV0-Gn1', '_score': 1.0, '_source': {'name': '批量测试3', 'content': '批量测试内容3', 'datetime': '2011-11-11 11:11:11'}}]}}

（4）通过scroll获取所有数据

# 数据总数为4，每次获取3条，需要2次才能获取完整
data1 = es.scroll(index='test-index', size=3, body={
        "query": {
            "match_all": {}
        }
    })
    print(data1)

结果

[{'_index': 'test-index', '_type': '_doc', '_id': 'JgecSXgB86SCYdV04mm-', '_score': 1.0, '_source': {'name': '测试', 'content': '测试内容', 'datetime': '2011-11-11 11:11:11'}}, {'_index': 'test-index', '_type': '_doc', '_id': 'JwezSXgB86SCYdV0-Gn1', '_score': 1.0, '_source': {'name': '批量测试1', 'content': '批量测试内容1', 'datetime': '2011-11-11 11:11:11'}}, {'_index': 'test-index', '_type': '_doc', '_id': 'KAezSXgB86SCYdV0-Gn1', '_score': 1.0, '_source': {'name': '批量测试2', 'content': '批量测试内容2', 'datetime': '2011-11-11 11:11:11'}}, {'_index': 'test-index', '_type': '_doc', '_id': 'KQezSXgB86SCYdV0-Gn1', '_score': 1.0, '_source': {'name': '批量测试3', 'content': '批量测试内容3', 'datetime': '2011-11-11 11:11:11'}}]