ElasticSearch入门：增删改查（python）

最新推荐文章于 2022-08-25 10:25:02 发布

M_Z_G_Y

最新推荐文章于 2022-08-25 10:25:02 发布

阅读量802

点赞数 2

分类专栏： ElasticSearch

本文链接：https://blog.csdn.net/M_Z_G_Y/article/details/82116041

版权

ElasticSearch 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

描述：ElasticSearch是一个基于Lucene的搜索服务器。它提供了一个分布式多用户能力的全文搜索引擎，基于RESTful web接口。下面介绍了利用Python API接口进行数据查询，方便其他系统的调用。（具体学习可查看https://blog.csdn.net/column/details/18392.html）

安装API

pip install elasticsearch

建立es连接

from elasticsearch import Elasticsearch

es = Elasticsearch([{'host':'10.10.13.12','port':9200}])

数据检索功能

es.search(index='logstash-2015.08.20', q='http_status_code:5* AND server_name:"web1"', from_='124119')

常用参数

index - 索引名
q - 查询指定匹配使用Lucene查询语法
from_ - 查询起始点默认0
doc_type - 文档类型
size - 指定查询条数默认10
field - 指定字段逗号分隔
sort - 排序字段：asc/desc
body - 使用Query DSL
scroll - 滚动查询

增删改查

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2018/11/6 16:34
# @Author  : HJH
# @File    : temp_es.py
# @Software: PyCharm
"""
# code is far away from bugs with the god animal protecting
    I love animals. They taste delicious.
              ┏┓      ┏┓
            ┏┛┻━━━┛┻┓
            ┃      ☃      ┃
            ┃  ┳┛  ┗┳  ┃
            ┃      ┻      ┃
            ┗━┓      ┏━┛
                ┃      ┗━━━┓
                ┃  神兽保佑    ┣┓
                ┃　永无BUG！   ┏┛
                ┗┓┓┏━┳┓┏┛
                  ┃┫┫  ┃┫┫
                  ┗┻┛  ┗┻┛
"""

from elasticsearch import Elasticsearch
from elasticsearch import helpers


class ESUtil(object):
    def __init__(self, localhost):
        self.localhost = localhost
        self.es = Elasticsearch(localhost)

    def create_index(self, index_name):
        '''
        创建索引
        '''

        _index_mappings = {
            "mappings": {
                index_name: {
                    "properties": {
                        " id": {
                            "type": "keyword",
                            "index": True
                        },
                        "name": {
                            "type": "keyword",
                            "index": True
                        },
                        "update_time": {
                            "type": "keyword",
                            "index": True
                        }
                    }
                }
            }
        }
        if self.es.indices.exists(index=index_name) is not True:
            res = self.es.indices.create(index=index_name, body=_index_mappings)
            # print(res)

    def insert_data(self, index_value, content_map):
        '''
        单条插入数据
        '''
        return self.es.index(index=index_value, doc_type=index_value, body=content_map, request_timeout=100)

    def delete_data(self, index_value, query_field, query_text):
        '''通过其他字段删除数据'''
        return self.es.delete_by_query(index=index_value, body={'query': {'term': {query_field: query_text}}},
                                       request_timeout=100)

    def delete_data_by_id(self, index_value, myid):
        '''通过id删除数据'''
        return self.es.delete(index=index_value, doc_type=index_value, id=myid, request_timeout=100)

    def delete_index(self, index_value):
        '''删除索引'''
        return self.es.indices.delete(index=index_value, request_timeout=100)

    def update_data(self, index_value, my_id, content_map):
        '''
        更新插入数据
        '''
        return self.es.update(index=index_value, doc_type=index_value, id=my_id, body=content_map, request_timeout=100)

    def get_data(self, index_value, id_value):
        '''
        通过主键查询数据
        '''
        return self.es.get(index=index_value, id=id_value, request_timeout=100)

    def search_data_1(self, index_value, query_field, query_text):
        '''
        搜索数据，返回查询后的结果列表，包含所有字段
        '''
        result_list = None
        id_list = None
        res = self.es.search(index=index_value, body={'query': {'term': {query_field: query_text}}},
                             request_timeout=100)
        search_list = res["hits"]["hits"]
        if len(search_list) > 0:
            result_list = search_list[0]["_source"]
            id_list = search_list[0]['_id']
        return result_list, id_list

    def search_data_1_condition(self, index_value, query_field, query_text):
        '''
        搜索数据，返回查询后的结果列表，只包含部分字段
        '''
        result_list = None
        id_list = None
        res = self.es.search(index=index_value, body={'query': {'term': {query_field: query_text}},
                                                      "_source": {"includes": ["name"]}}, request_timeout=100)
        search_list = res["hits"]["hits"]
        if len(search_list) > 0:
            result_list = search_list[0]["_source"]
            id_list = search_list[0]['_id']
        return result_list, id_list

    def search_data_user_defined(self, index_value, query_field, query_text, search_offset, search_size):
        '''
        搜索数据返回查询后的结果列表,自定义返回个数，最多1万个
        '''
        final_result = []
        es_result = self.es.search(
            index=index_value,
            body={"query": {'term': {query_field: query_text}}},
            from_=search_offset,
            size=search_size,
            request_timeout=100
        )

        result_items = es_result['hits']['hits']
        for item in result_items:
            final_result.append(item['_source'])

        return final_result

    def search_data_all(self, index_value, query_field, query_text, scroll='5m'):
        '''
        搜索数据返回查询后的结果列表,返回全部
        '''
        final_result = []
        es_result = helpers.scan(
            client=self.es,
            query={"query": {'term': {query_field: query_text}}},
            scroll=scroll,
            index=index_value,
            request_timeout=100
        )
        for item in es_result:
            final_result.append(item['_source'])
        return final_result

    def search_data_mulmatch(self, index_value, query_dict, scroll='5m'):
        '''
        多字段查询
        '''
        # body = {"query": {"bool": {"must": [{"term": {"update_time": "1539156536.4105523"}},
        #                                     {"trem": {"image_name": "kjhskj"}}]}}}
        my_list = []
        for key in query_dict:
            my_list.append({"term": {key: query_dict[key]}})
        final_result = []
        es_result = helpers.scan(
            client=self.es,
            query={"query": {"bool": {"must": my_list}},
                   "_source": {"includes": ["imageUuid", "hash", "f7"]}},
            scroll=scroll,
            size=10000,
            index=index_value,
            request_timeout=100
        )
        for item in es_result:
            final_result.append(item['_source'])
        return final_result

    def search_data_text(self, index_value, query_field, query_text, scroll='5m'):
        '''
        模糊查询
        '''
        final_result = []
        es_result = helpers.scan(
            client=self.es,
            query={"query": {"fuzzy": {
                query_field: {"value": query_text, "fuzziness": 'AUTO', "prefix_length": 0, "max_expansions": 500}}},
                "_source": {"includes": ["imageUuid"]}},
            scroll=scroll,
            size=10000,
            index=index_value,
            request_timeout=100
        )
        for item in es_result:
            final_result.append(item['_source'])
        return final_result

    def search_lte_time(self, index_value, query_time, scroll='5m'):
        '''
        按照条件搜索数据返回查询后的结果列表,返回全部
        lte：小于等于
        gt：大于
        '''
        final_result = []

        es_result = helpers.scan(
            client=self.es,
            query={"query": {'range': {'update_time': {'lte': query_time}}}},
            scroll=scroll,
            index=index_value,
            request_timeout=100
        )

        for item in es_result:
            final_result.append(item['_source'])
        return final_result

    def get_max(self, index, query):
        '''找到最大值'''
        body = {
            "query": {
                "match_all": {}
            },
            "sort": {
                query: {
                    "order": "desc"
                }
            }
        }
        res = self.es.search(index=index, body=body, request_timeout=100)
        search_list = res["hits"]["hits"]
        if len(search_list) > 0:
            status = search_list[0]
        else:
            status = None
        return status

    def copy_idex(self, src, des):
        '''复制索引的全部内容'''
        body = {"query": {"match_all": {}}}
        return helpers.reindex(client=self.es, source_index=src, target_index=des, target_client=self.es,
                               query=body, chunk_size=1000)

    def temp_copy_idex_condition(self, src, des, query_time):
        '''按条件复制索引并设置请求时间'''
        body = {"source": {"index": src, "query": {'range': {'update_time': {'lte': query_time}}}},
                "dest": {"index": des}}
        return self.es.reindex(body=body, request_timeout=1000)

    def add_alias(self, index_name, alias):
        return self.es.indices.put_alias(index_name, alias, request_timeout=100)


if __name__ == "__main__":
    es = ESUtil(['10.9.27.153:9200'])

M_Z_G_Y

关注

2
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
ElasticSearch入门：增删改查（python）

描述：ElasticSearch是一个基于Lucene的搜索服务器。它提供了一个分布式多用户能力的全文搜索引擎，基于RESTful web接口。下面介绍了利用Python API接口进行数据查询，方便其他系统的调用。（具体学习可查看https://blog.csdn.net/column/details/18392.html）安装APIpip install elasticsearch...
复制链接

扫一扫

专栏目录