Elasticsearch 常用rest api与elasticsearch库_elasticsearch rest api-CSDN博客

params = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "aliases": {
        "test_log":{}
    },
    "mappings": {
        "properties": {
            "create_time": {
                "type": "date"
            },
            "status":{"type": "integer"},
            "dev_ip":{"type": "ip"},
            "dev_uuid":{"type": "keyword"},
            "user": {
                "properties": {
                    "name": {
                        "type": "text"
                    },
                    "age": {
                        "type": "integer"
                    },
                    "email": {
                        "type": "keyword"
                    }
                }
            },
            "infos": {
                "type": "nested",
                "dynamic": false,
                "properties": {
                    "v_id": {"type": "keyword"},
                    "v_name": {"type": "keyword"},
                    "v_desc": {"type": "text"}
                }
            },
            "remark": {"type": "text"},
            "show_num": {"type": "long", "doc_values": false, "index": false}
        }
    }
}


"""
字段类型：

text：用于全文本搜索，如文章内容。text字段会被分析器分词，支持模糊搜索和前缀搜索。
keyword：用于不进行分词的搜索，适合存储关键词、ID、标签等。keyword字段不会被分词，适用于精确匹配和聚合。
integer/long/short/byte/double/float/half_float/scaled_float：用于数值类型的数据，不同的类型对应不同的数值范围和精度。
boolean：布尔型字段，值只能是true或false。
binary：用于存储二进制数据，如图像或文件。
date：日期时间字段，可以使用ISO8601格式的字符串或毫秒级的时间戳。
date_nanos：纳秒精度的日期时间字段，存储为纳秒时间戳。
ip：用于存储IPv4或IPv6地址。
object：用于嵌套的JSON对象。可以定义内部字段的映射。
nested：用于存储复杂结构的嵌套文档，允许对嵌套文档进行独立索引和搜索。
geo_point：用于地理坐标，支持基于地理位置的查询。
geo_shape：用于地理形状，如多边形，支持更复杂的地理查询。
completion：用于自动补全功能，存储经过分析的词条列表。
constant_keyword：与keyword类似，但值在索引时会被复制到所有分片，加速聚合操作。
token_count：用于存储分词后的词数。
"""


"""
show_num 字段
类型：long
doc_values：false
这意味着 Elasticsearch 不会为这个字段创建 doc values。Doc values 是一种列式存储结构，它使得某些类型的查询（如聚合和排序）更加高效。由于 show_num 字段被设置为 doc_values: false，它可能不会被用于这些类型的查询，或者如果尝试使用，可能会导致性能下降。
index：false
这表示 show_num 字段不会被索引。索引是 Elasticsearch 中用于快速检索数据的数据结构。由于这个字段没有被索引，它不能用于搜索查询中的过滤或排序条件。基本上，这个字段在存储后，只能通过其 _source 字段（即原始 JSON 文档）进行访问。
用途：show_num 字段可能用于存储一些不需要进行搜索、排序或聚合操作的数值数据。它可能只是作为记录的一部分被存储和检索。


infos 字段
类型：nested
nested 类型允许对象数组中的每个对象被索引为独立的文档，但保留在原始文档中的嵌套结构。这对于需要独立查询嵌套对象中的字段，同时保持它们与父文档的关系的情况非常有用。
dynamic：false
这意味着 infos 字段不能动态添加新的属性。所有属性都必须在映射中明确声明。
"""

2. 删除索引

请求方式：DELETE

请求URL：http://ip/（your_index_name）

示例URL：http://ip/test_log_20240710-0

3. 插入单条数据

请求方式：POST

请求URL：http://ip/（your_index_name）

示例URL：http://ip/test_log_20240710-0

数据参数：

{
  "create_time": 1720601022255,
  "status": 201,
  "dev_ip": "192.168.1.101",
  "dev_uuid": "123e4567e89b12d3a456426614174000",
  "user": {
    "name": "战三",
    "age": 30,
    "email": "zhansan@example.com"
  },
  "infos": [
    {
      "v_id": "123e4567e89b12d3a456426614174000",
      "v_name": "战三",
      "v_desc": "描述啦啦啦啦啦啦"
    }
  ],
  "remark": "描述！！！！！",
  "show_num": 6789
}

4. 更新单条数据

请求方式：PUT

请求URL：http://ip/（your_index_name）/_doc/（本条记录id）

示例URL：http://ip/test_log_20240710-0/_doc/KjjOm5ABJ5wlHmqgfJvm

数据参数：

{
  "create_time": "2023-04-01T12:00:00Z",
  "status": 200,
  "dev_ip": "192.168.1.100",
  "dev_uuid": "123e4567-e89b-12d3-a456-426614174000",
  "user": {
    "name": "阿汉",
    "age": 30,
    "email": "john.doe@example.com"
  },
  "infos": [
    {
      "v_id": "info1",
      "v_name": "Info One",
      "v_desc": "This is the description of info one."
    },
    {
      "v_id": "info2",
      "v_name": "Info Two",
      "v_desc": "This is the description of info two."
    }
  ],
  "remark": "Additional remarks about this log entry.",
  "show_num": 12345
}

5. 删除单条数据

请求方式：DELETE

请求URL：http://ip/（your_index_name）/_doc/（本条记录id）

示例URL：http://ip/test_log_20240710-0/_doc/KjjOm5ABJ5wlHmqgfJvm

数据参数：无

6. 查询数据

请求方式：POST

请求URL：http://ip/（your_index_name）/_search

示例URL：http://ip/test_log_20240710-0/_search

数据参数：

{
    "size": 10,
    "query": {
        "bool": {
            "must": [
                {
                    "term": {
                        "status": 200
                    }
                }
            ]
        }
    }
}

二、python elasticsearch库

1. 工具类

复制出来就能用，elasticsearch使用的6.x版本已调试

#! -*- coding:utf-8 -*
import time

from elasticsearch import Elasticsearch, helpers


class EstUtil:
    _instance = None

    def __new__(cls, *args, **kwargs):
        if not cls._instance:
            cls._instance = super(EstUtil, cls).__new__(cls, *args, **kwargs)
        return cls._instance

    def __init__(self):
        # todo hosts不传有默认的，可传hosts=[]
        self.es = Elasticsearch(timeout=300, max_retries=3)

    def is_exists_index(self, index_name):
        """
        是否存在索引
        :param index_name: 索引名称
        :return:
        """
        print(index_name)
        return self.es.indices.exists(index=index_name)

    def get_all_indices(self, index_=None):
        """
        获取所有索引
        :param index_name: 根据名称获取所有的索引
        :return:
        """
        if not index_:
            # 只返回所有索引名
            all_indices = self.es.indices.get_alias().keys()
        else:
            # 返回索引的信息，状态等
            all_indices = self.es.cat.indices(index=index_, format="json")
        return all_indices

    def index(self, index_name, body):
        """
        插入单条数据
        :param index_name: 索引名称
        :param body: 请求体数据dict
        :return: boolean
        {
          "name": "aaa",
          "age": 18
        }
        """
        # 如果pip安装的elasticsearch版本低于7.x，加入 doc_type='_doc'
        response = self.es.index(index=index_name, body=body, doc_type='_doc')

        # 检查响应
        if response['result'] == 'created':
            return True
        else:
            return False

    def batch_insert(self, index_name, body_list):
        """
        批量插入数据
        :param index_name: 索引名称
        :param body_list: 请求体数据list
        :return:
        """
        data_list = list()
        for data in body_list:
            action = {
                "_index": index_name,
                "_type": '_doc',
                '_source': data
            }
            data_list.append(action)

        return helpers.bulk(self.es, data_list)

    def update(self, index_name, doc_id, update_body):
        """
        :param index_name: 索引名
        :param doc_id: 记录id
        :param update_body: 更新的dict
        :return: boolean
        """
        # 如果pip安装的elasticsearch版本低于7.x，加入 doc_type='_doc'
        response = self.es.update(index=index_name, id=doc_id, body=update_body, doc_type="_doc")

        # 检查编辑是否成功
        if response['result'] == 'updated':
            return True
        else:
            return False

    def batch_update(self, index_name, body_list):
        """
        批量编辑数据
        :param index_name: 索引名称
        :param body_list: 请求体数据list
        :return:
        """
        data_list = list()
        for data in body_list:
            _id = data.pop("id")
            action = {
                "_op_type": "update",
                "_index": index_name,
                'doc': data,
                "_id": _id
            }
            data_list.append(action)

        return helpers.bulk(self.es, data_list)

    def delete(self, index_name, doc_id):
        """
        删除单条数据
        :param index_name: 索引名称
        :param doc_id: 记录id
        :return:  boolean
        """
        response = self.es.delete(index=index_name, id=doc_id)

        # 检查删除是否成功
        if response['result'] == 'deleted':
            return True
        else:
            return False

    def batch_delete(self, index_name, body_list):
        """
        批量删除数据
        :param index_name: 索引名称
        :param body_list: 请求体数据list
        :return:
        """
        data_list = list()
        for data in body_list:
            _id = data.get("id")
            action = {
                "_op_type": "delete",
                "_index": index_name,
                "_id": _id
            }
            data_list.append(action)
        return helpers.bulk(self.es, data_list)

    def delete_index(self, index_name):
        """
        删除单个索引
        :param index_name: 索引名称
        :return:
        """
        return self.es.indices.delete(index=index_name)['acknowledged']

    def create_index(self, index_name, field_type_dict, number_of_shards, number_of_replicas):
        """
        传入简单的键值对
        :param index_name 索引名称
        :param field_type_dict 字段名称，类型字典
        :param number_of_shards 分片数量
        :param number_of_replicas 副本数量
        :return: 创建成功
        """

        if self.is_exists_index(index_name):
            raise ValueError('索引已存在：%s' % index_name)
        body = {}
        settings = {
            'number_of_shards': number_of_shards,
            'number_of_replicas': number_of_replicas
        }
        mappings = {}
        index_type = {}
        properties = {}
        for key, value in field_type_dict.items():
            properties[key] = {'type': value}
        index_type['properties'] = properties
        mappings['_doc'] = index_type
        body['settings'] = settings
        body['mappings'] = mappings
        response = self.es.indices.create(index=index_name, body=body)
        return response

    def create_index_by_body(self, index_name, body):
        """
        自定义参数创建索引
        :param index_name 索引名称
        :param body 组装好的创建dict
        :return:
        """
        """
        示例参数：
            {
            "settings": {
                "number_of_shards": 1,
                "number_of_replicas": 0
            },
            "aliases": {
                "test_log": {}
            },
            "mappings": {
                "properties": {
                    "create_time": {
                        "type": "date"
                    },
                    "status": {"type": "integer"},
                    "dev_ip": {"type": "ip"},
                    "dev_uuid": {"type": "keyword"},
                    "user": {
                        "properties": {
                            "name": {
                                "type": "text"
                            },
                            "age": {
                                "type": "integer"
                            },
                            "email": {
                                "type": "keyword"
                            }
                        }
                    },
                    "infos": {
                        "type": "nested",
                        "dynamic": False,
                        "properties": {
                            "v_id": {"type": "keyword"},
                            "v_name": {"type": "keyword"},
                            "v_desc": {"type": "text"}
                        }
                    },
                    "remark": {"type": "text"},
                    "show_num": {"type": "long", "doc_values": False, "index": False}
                }
            }
        }
        """
        if self.is_exists_index(index_name):
            raise ValueError('索引已存在：%s' % index_name)

        response = self.es.indices.create(index=index_name, body=body)
        return response

    def search(self, index_name, request_body):
        """
        查询数据
        :param index_name: 索引名称
        :param request_body: 查询dsl
        :return:
        """
        return self.es.search(index=index_name, body=request_body, timeout="5m")

    def search_page(self, index_name, request_body, page, size):
        """
        分页查询数据（数据量超过10000条时，直接使用from和size参数可能会导致性能问题）
        :param index_name: 索引名称
        :param request_body: 查询dsl
        :param page: 页码
        :param size: 每页条数
        :return:
        """
        from_value = (page - 1) * size  # 偏移量
        return self.es.search(index=index_name, body=request_body, from_=from_value, size=size)

    def search_after(self, index_name, search_after_body):
        """
        分页查询（基于上一次查询的最后一个文档的排序值来进行下一次查询，查询中必须包含 sort 字段，并且这个字段的值需要唯一）
        :param index_name: 索引名称
        :param search_after_body 查询dsl
        :return:
        """
        # search_after_body参数，search_after字段，传入最后一次的排序值，第一次不用传
        # search_after_body参数中，需要包含size，sort
        # 排序值必须有唯一性，如果具有相同的排序值，search_after无法正确地定位到下一页的开始位置，导致数据重复或遗漏，如时间排序可能重复
        # 采用多个字段排序，或使用类似_id的唯一值
        # eg：{"size": 1, "sort": [{"create_time": {"order": "desc"}}, {"_id": {"order": "asc"}}]}

        response = self.es.search(index=index_name, body=search_after_body)

        hits = response.get('hits').get('hits')
        search_after_body = {'search_after': hits[-1].get('sort') if hits else []}
        return search_after_body, response

    def search_scroll(self, index_name, scroll, body):
        """
        分页查询，第一次执行（scroll 是一种基于游标的分页方式，每次搜索创建一个快照）
        :param index_name: 索引名称
        :param scroll: scroll保持时间，1m为1分钟
        :param body: 查询条件体dict
        :return:
        """
        # 初始化scroll查询
        response = self.es.search(index=index_name, scroll=scroll, body=body)
        scroll_id = response['_scroll_id']
        return scroll_id, response

    def search_next(self, scroll_id, scroll):
        """
        与上方的search_scroll结合使用
        :param scroll_id: 上一次的快照id
        :param scroll: scroll快照保持时间，1m为1分钟，如果查询数据超过1分钟，会报错
        :return:
        """
        response = self.es.scroll(scroll_id=scroll_id, scroll=scroll)
        scroll_id = response['_scroll_id']
        return scroll_id, response


if __name__ == '__main__':
    es = EstUtil()

    create_index_body = {
        "settings": {
            "number_of_shards": 1,
            "number_of_replicas": 0
        },
        "aliases": {
            "test_log": {}
        },
        "mappings": {
            "properties": {
                "create_time": {
                    "type": "date"
                },
                "status": {"type": "integer"},
                "dev_ip": {"type": "ip"},
                "dev_uuid": {"type": "keyword"},
                "user": {
                    "properties": {
                        "name": {
                            "type": "text"
                        },
                        "age": {
                            "type": "integer"
                        },
                        "email": {
                            "type": "keyword"
                        }
                    }
                },
                "infos": {
                    "type": "nested",
                    "dynamic": False,
                    "properties": {
                        "v_id": {"type": "keyword"},
                        "v_name": {"type": "keyword"},
                        "v_desc": {"type": "text"}
                    }
                },
                "remark": {"type": "text"},
                "show_num": {"type": "long", "doc_values": False, "index": False}
            }
        }
    }

    index_name = "test_data_index"
    # 创建索引
    create_respone = es.create_index_by_body(index_name, create_index_body)
    print(create_respone)
    # 输出 {u'index': u'test_data_index', u'acknowledged': True, u'shards_acknowledged': True}

    # 查看所有的索引
    indices = es.get_all_indices()
    print(indices)

    # 插入单条数据
    insert_body = {
        "create_time": 1720601022255,
        "status": 201,
        "dev_ip": "192.168.1.101",
        "dev_uuid": "123e4567e89b12d3a456426614174000",
        "user": {
            "name": "战三",
            "age": 30,
            "email": "zhansan@example.com"
        },
        "infos": [
            {
                "v_id": "123e4567e89b12d3a456426614174000",
                "v_name": "战三",
                "v_desc": "描述啦啦啦啦啦啦"
            }
        ],
        "remark": "描述！！！！！",
        "show_num": 6789
    }
    insert_resp = es.index(index_name, insert_body)
    print(insert_resp)

    # 更新数据/给某记录动态增加字段数据（send_time建索引时不存在，可更新增加）
    update_body = {
        "doc": {
            "send_time": int(time.time())
        }
    }
    update_resp = es.update(index_name, "OWps6pEBay2ae5Uuwei-", update_body)
    print(update_resp)

    # 基础查询
    resp2 = es.search(index_name, {"size": 10, "from": 1})
    print(resp2)

    # 分页查询1
    search_body = {
        "query": {
            "bool": {
                "must": [
                    {
                        "term": {
                            "send_time": 1726226611
                        }
                    }
                ]
            }
        }
    }
    resp1 = es.search_page(index_name, search_body, 1, 1)
    print(resp1)

    # 分页查询2
    scroll = "1m"
    scroll_id, response = es.search_scroll(index_name, scroll, {"size": 10})
    print(scroll_id, response)
    while response["hits"]["hits"]:
        scroll_id, response = es.search_next(scroll_id, scroll)
        print(scroll_id, response)

    # after分页两种写法，分页查询3-1
    after_search = {"size": 1, "sort": [{"create_time": {"order": "desc"}}, {"_id": {"order": "asc"}}]}
    search_after_body, resp3 = es.search_after(index_name, after_search)
    print(search_after_body, resp3)
    while resp3 and resp3["hits"]["hits"]:
        after_search["search_after"] = search_after_body.get("search_after")
        search_after_body, resp3 = es.search_after(index_name, after_search)

    # after分页两种写法，分页查询3-2
    search_after = None
    while True:
        if search_after:
            after_search["search_after"] = search_after
        search_after_body, resp3 = es.search_after(index_name, after_search)

        if not resp3["hits"]["hits"]:
            break

        search_after = search_after_body["search_after"]