目录
一、rest api
1. 新建索引
请求方式:PUT
请求URL:http://ip/(your_index_name)
示例URL:http://ip/test_log_20240710-0
数据参数:
params = {
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"aliases": {
"test_log":{}
},
"mappings": {
"properties": {
"create_time": {
"type": "date"
},
"status":{"type": "integer"},
"dev_ip":{"type": "ip"},
"dev_uuid":{"type": "keyword"},
"user": {
"properties": {
"name": {
"type": "text"
},
"age": {
"type": "integer"
},
"email": {
"type": "keyword"
}
}
},
"infos": {
"type": "nested",
"dynamic": false,
"properties": {
"v_id": {"type": "keyword"},
"v_name": {"type": "keyword"},
"v_desc": {"type": "text"}
}
},
"remark": {"type": "text"},
"show_num": {"type": "long", "doc_values": false, "index": false}
}
}
}
"""
字段类型:
text:用于全文本搜索,如文章内容。text字段会被分析器分词,支持模糊搜索和前缀搜索。
keyword:用于不进行分词的搜索,适合存储关键词、ID、标签等。keyword字段不会被分词,适用于精确匹配和聚合。
integer/long/short/byte/double/float/half_float/scaled_float:用于数值类型的数据,不同的类型对应不同的数值范围和精度。
boolean:布尔型字段,值只能是true或false。
binary:用于存储二进制数据,如图像或文件。
date:日期时间字段,可以使用ISO8601格式的字符串或毫秒级的时间戳。
date_nanos:纳秒精度的日期时间字段,存储为纳秒时间戳。
ip:用于存储IPv4或IPv6地址。
object:用于嵌套的JSON对象。可以定义内部字段的映射。
nested:用于存储复杂结构的嵌套文档,允许对嵌套文档进行独立索引和搜索。
geo_point:用于地理坐标,支持基于地理位置的查询。
geo_shape:用于地理形状,如多边形,支持更复杂的地理查询。
completion:用于自动补全功能,存储经过分析的词条列表。
constant_keyword:与keyword类似,但值在索引时会被复制到所有分片,加速聚合操作。
token_count:用于存储分词后的词数。
"""
"""
show_num 字段
类型:long
doc_values:false
这意味着 Elasticsearch 不会为这个字段创建 doc values。Doc values 是一种列式存储结构,它使得某些类型的查询(如聚合和排序)更加高效。由于 show_num 字段被设置为 doc_values: false,它可能不会被用于这些类型的查询,或者如果尝试使用,可能会导致性能下降。
index:false
这表示 show_num 字段不会被索引。索引是 Elasticsearch 中用于快速检索数据的数据结构。由于这个字段没有被索引,它不能用于搜索查询中的过滤或排序条件。基本上,这个字段在存储后,只能通过其 _source 字段(即原始 JSON 文档)进行访问。
用途:show_num 字段可能用于存储一些不需要进行搜索、排序或聚合操作的数值数据。它可能只是作为记录的一部分被存储和检索。
infos 字段
类型:nested
nested 类型允许对象数组中的每个对象被索引为独立的文档,但保留在原始文档中的嵌套结构。这对于需要独立查询嵌套对象中的字段,同时保持它们与父文档的关系的情况非常有用。
dynamic:false
这意味着 infos 字段不能动态添加新的属性。所有属性都必须在映射中明确声明。
"""
2. 删除索引
请求方式:DELETE
请求URL:http://ip/(your_index_name)
示例URL:http://ip/test_log_20240710-0
3. 插入单条数据
请求方式:POST
请求URL:http://ip/(your_index_name)
示例URL:http://ip/test_log_20240710-0
数据参数:
{
"create_time": 1720601022255,
"status": 201,
"dev_ip": "192.168.1.101",
"dev_uuid": "123e4567e89b12d3a456426614174000",
"user": {
"name": "战三",
"age": 30,
"email": "zhansan@example.com"
},
"infos": [
{
"v_id": "123e4567e89b12d3a456426614174000",
"v_name": "战三",
"v_desc": "描述啦啦啦啦啦啦"
}
],
"remark": "描述!!!!!",
"show_num": 6789
}
4. 更新单条数据
请求方式:PUT
请求URL:http://ip/(your_index_name)/_doc/(本条记录id)
示例URL:http://ip/test_log_20240710-0/_doc/KjjOm5ABJ5wlHmqgfJvm
数据参数:
{
"create_time": "2023-04-01T12:00:00Z",
"status": 200,
"dev_ip": "192.168.1.100",
"dev_uuid": "123e4567-e89b-12d3-a456-426614174000",
"user": {
"name": "阿汉",
"age": 30,
"email": "john.doe@example.com"
},
"infos": [
{
"v_id": "info1",
"v_name": "Info One",
"v_desc": "This is the description of info one."
},
{
"v_id": "info2",
"v_name": "Info Two",
"v_desc": "This is the description of info two."
}
],
"remark": "Additional remarks about this log entry.",
"show_num": 12345
}
5. 删除单条数据
请求方式:DELETE
请求URL:http://ip/(your_index_name)/_doc/(本条记录id)
示例URL:http://ip/test_log_20240710-0/_doc/KjjOm5ABJ5wlHmqgfJvm
数据参数:无
6. 查询数据
请求方式:POST
请求URL:http://ip/(your_index_name)/_search
示例URL:http://ip/test_log_20240710-0/_search
数据参数:
{
"size": 10,
"query": {
"bool": {
"must": [
{
"term": {
"status": 200
}
}
]
}
}
}
二、python elasticsearch库
1. 工具类
复制出来就能用,elasticsearch使用的6.x版本已调试
#! -*- coding:utf-8 -*
import time
from elasticsearch import Elasticsearch, helpers
class EstUtil:
_instance = None
def __new__(cls, *args, **kwargs):
if not cls._instance:
cls._instance = super(EstUtil, cls).__new__(cls, *args, **kwargs)
return cls._instance
def __init__(self):
# todo hosts不传有默认的,可传hosts=[]
self.es = Elasticsearch(timeout=300, max_retries=3)
def is_exists_index(self, index_name):
"""
是否存在索引
:param index_name: 索引名称
:return:
"""
print(index_name)
return self.es.indices.exists(index=index_name)
def get_all_indices(self, index_=None):
"""
获取所有索引
:param index_name: 根据名称获取所有的索引
:return:
"""
if not index_:
# 只返回所有索引名
all_indices = self.es.indices.get_alias().keys()
else:
# 返回索引的信息,状态等
all_indices = self.es.cat.indices(index=index_, format="json")
return all_indices
def index(self, index_name, body):
"""
插入单条数据
:param index_name: 索引名称
:param body: 请求体数据dict
:return: boolean
{
"name": "aaa",
"age": 18
}
"""
# 如果pip安装的elasticsearch版本低于7.x,加入 doc_type='_doc'
response = self.es.index(index=index_name, body=body, doc_type='_doc')
# 检查响应
if response['result'] == 'created':
return True
else:
return False
def batch_insert(self, index_name, body_list):
"""
批量插入数据
:param index_name: 索引名称
:param body_list: 请求体数据list
:return:
"""
data_list = list()
for data in body_list:
action = {
"_index": index_name,
"_type": '_doc',
'_source': data
}
data_list.append(action)
return helpers.bulk(self.es, data_list)
def update(self, index_name, doc_id, update_body):
"""
:param index_name: 索引名
:param doc_id: 记录id
:param update_body: 更新的dict
:return: boolean
"""
# 如果pip安装的elasticsearch版本低于7.x,加入 doc_type='_doc'
response = self.es.update(index=index_name, id=doc_id, body=update_body, doc_type="_doc")
# 检查编辑是否成功
if response['result'] == 'updated':
return True
else:
return False
def batch_update(self, index_name, body_list):
"""
批量编辑数据
:param index_name: 索引名称
:param body_list: 请求体数据list
:return:
"""
data_list = list()
for data in body_list:
_id = data.pop("id")
action = {
"_op_type": "update",
"_index": index_name,
'doc': data,
"_id": _id
}
data_list.append(action)
return helpers.bulk(self.es, data_list)
def delete(self, index_name, doc_id):
"""
删除单条数据
:param index_name: 索引名称
:param doc_id: 记录id
:return: boolean
"""
response = self.es.delete(index=index_name, id=doc_id)
# 检查删除是否成功
if response['result'] == 'deleted':
return True
else:
return False
def batch_delete(self, index_name, body_list):
"""
批量删除数据
:param index_name: 索引名称
:param body_list: 请求体数据list
:return:
"""
data_list = list()
for data in body_list:
_id = data.get("id")
action = {
"_op_type": "delete",
"_index": index_name,
"_id": _id
}
data_list.append(action)
return helpers.bulk(self.es, data_list)
def delete_index(self, index_name):
"""
删除单个索引
:param index_name: 索引名称
:return:
"""
return self.es.indices.delete(index=index_name)['acknowledged']
def create_index(self, index_name, field_type_dict, number_of_shards, number_of_replicas):
"""
传入简单的键值对
:param index_name 索引名称
:param field_type_dict 字段名称,类型字典
:param number_of_shards 分片数量
:param number_of_replicas 副本数量
:return: 创建成功
"""
if self.is_exists_index(index_name):
raise ValueError('索引已存在:%s' % index_name)
body = {}
settings = {
'number_of_shards': number_of_shards,
'number_of_replicas': number_of_replicas
}
mappings = {}
index_type = {}
properties = {}
for key, value in field_type_dict.items():
properties[key] = {'type': value}
index_type['properties'] = properties
mappings['_doc'] = index_type
body['settings'] = settings
body['mappings'] = mappings
response = self.es.indices.create(index=index_name, body=body)
return response
def create_index_by_body(self, index_name, body):
"""
自定义参数创建索引
:param index_name 索引名称
:param body 组装好的创建dict
:return:
"""
"""
示例参数:
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"aliases": {
"test_log": {}
},
"mappings": {
"properties": {
"create_time": {
"type": "date"
},
"status": {"type": "integer"},
"dev_ip": {"type": "ip"},
"dev_uuid": {"type": "keyword"},
"user": {
"properties": {
"name": {
"type": "text"
},
"age": {
"type": "integer"
},
"email": {
"type": "keyword"
}
}
},
"infos": {
"type": "nested",
"dynamic": False,
"properties": {
"v_id": {"type": "keyword"},
"v_name": {"type": "keyword"},
"v_desc": {"type": "text"}
}
},
"remark": {"type": "text"},
"show_num": {"type": "long", "doc_values": False, "index": False}
}
}
}
"""
if self.is_exists_index(index_name):
raise ValueError('索引已存在:%s' % index_name)
response = self.es.indices.create(index=index_name, body=body)
return response
def search(self, index_name, request_body):
"""
查询数据
:param index_name: 索引名称
:param request_body: 查询dsl
:return:
"""
return self.es.search(index=index_name, body=request_body, timeout="5m")
def search_page(self, index_name, request_body, page, size):
"""
分页查询数据(数据量超过10000条时,直接使用from和size参数可能会导致性能问题)
:param index_name: 索引名称
:param request_body: 查询dsl
:param page: 页码
:param size: 每页条数
:return:
"""
from_value = (page - 1) * size # 偏移量
return self.es.search(index=index_name, body=request_body, from_=from_value, size=size)
def search_after(self, index_name, search_after_body):
"""
分页查询(基于上一次查询的最后一个文档的排序值来进行下一次查询,查询中必须包含 sort 字段,并且这个字段的值需要唯一)
:param index_name: 索引名称
:param search_after_body 查询dsl
:return:
"""
# search_after_body参数,search_after字段,传入最后一次的排序值,第一次不用传
# search_after_body参数中,需要包含size,sort
# 排序值必须有唯一性,如果具有相同的排序值,search_after无法正确地定位到下一页的开始位置,导致数据重复或遗漏,如时间排序可能重复
# 采用多个字段排序,或使用类似_id的唯一值
# eg:{"size": 1, "sort": [{"create_time": {"order": "desc"}}, {"_id": {"order": "asc"}}]}
response = self.es.search(index=index_name, body=search_after_body)
hits = response.get('hits').get('hits')
search_after_body = {'search_after': hits[-1].get('sort') if hits else []}
return search_after_body, response
def search_scroll(self, index_name, scroll, body):
"""
分页查询,第一次执行(scroll 是一种基于游标的分页方式,每次搜索创建一个快照)
:param index_name: 索引名称
:param scroll: scroll保持时间,1m为1分钟
:param body: 查询条件体dict
:return:
"""
# 初始化scroll查询
response = self.es.search(index=index_name, scroll=scroll, body=body)
scroll_id = response['_scroll_id']
return scroll_id, response
def search_next(self, scroll_id, scroll):
"""
与上方的search_scroll结合使用
:param scroll_id: 上一次的快照id
:param scroll: scroll快照保持时间,1m为1分钟,如果查询数据超过1分钟,会报错
:return:
"""
response = self.es.scroll(scroll_id=scroll_id, scroll=scroll)
scroll_id = response['_scroll_id']
return scroll_id, response
if __name__ == '__main__':
es = EstUtil()
create_index_body = {
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"aliases": {
"test_log": {}
},
"mappings": {
"properties": {
"create_time": {
"type": "date"
},
"status": {"type": "integer"},
"dev_ip": {"type": "ip"},
"dev_uuid": {"type": "keyword"},
"user": {
"properties": {
"name": {
"type": "text"
},
"age": {
"type": "integer"
},
"email": {
"type": "keyword"
}
}
},
"infos": {
"type": "nested",
"dynamic": False,
"properties": {
"v_id": {"type": "keyword"},
"v_name": {"type": "keyword"},
"v_desc": {"type": "text"}
}
},
"remark": {"type": "text"},
"show_num": {"type": "long", "doc_values": False, "index": False}
}
}
}
index_name = "test_data_index"
# 创建索引
create_respone = es.create_index_by_body(index_name, create_index_body)
print(create_respone)
# 输出 {u'index': u'test_data_index', u'acknowledged': True, u'shards_acknowledged': True}
# 查看所有的索引
indices = es.get_all_indices()
print(indices)
# 插入单条数据
insert_body = {
"create_time": 1720601022255,
"status": 201,
"dev_ip": "192.168.1.101",
"dev_uuid": "123e4567e89b12d3a456426614174000",
"user": {
"name": "战三",
"age": 30,
"email": "zhansan@example.com"
},
"infos": [
{
"v_id": "123e4567e89b12d3a456426614174000",
"v_name": "战三",
"v_desc": "描述啦啦啦啦啦啦"
}
],
"remark": "描述!!!!!",
"show_num": 6789
}
insert_resp = es.index(index_name, insert_body)
print(insert_resp)
# 更新数据/给某记录动态增加字段数据(send_time建索引时不存在,可更新增加)
update_body = {
"doc": {
"send_time": int(time.time())
}
}
update_resp = es.update(index_name, "OWps6pEBay2ae5Uuwei-", update_body)
print(update_resp)
# 基础查询
resp2 = es.search(index_name, {"size": 10, "from": 1})
print(resp2)
# 分页查询1
search_body = {
"query": {
"bool": {
"must": [
{
"term": {
"send_time": 1726226611
}
}
]
}
}
}
resp1 = es.search_page(index_name, search_body, 1, 1)
print(resp1)
# 分页查询2
scroll = "1m"
scroll_id, response = es.search_scroll(index_name, scroll, {"size": 10})
print(scroll_id, response)
while response["hits"]["hits"]:
scroll_id, response = es.search_next(scroll_id, scroll)
print(scroll_id, response)
# after分页两种写法,分页查询3-1
after_search = {"size": 1, "sort": [{"create_time": {"order": "desc"}}, {"_id": {"order": "asc"}}]}
search_after_body, resp3 = es.search_after(index_name, after_search)
print(search_after_body, resp3)
while resp3 and resp3["hits"]["hits"]:
after_search["search_after"] = search_after_body.get("search_after")
search_after_body, resp3 = es.search_after(index_name, after_search)
# after分页两种写法,分页查询3-2
search_after = None
while True:
if search_after:
after_search["search_after"] = search_after
search_after_body, resp3 = es.search_after(index_name, after_search)
if not resp3["hits"]["hits"]:
break
search_after = search_after_body["search_after"]