自己在项目中用到的一些东西。分享一下
查询所有
GET index_search_model/_search
{
"query": {
"match_all": {}
}
}
基本条件查询
GET index_search_model/_search
{
"query": {
"match": {
"search_type": 1
}
},
# 取消最大10000条限制
"track_total_hits": true
}
分页+区间+条件
GET index_search_model/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"content_name": {
"query": "一切"
}
}
}
]
},
"range": {
"search_type": {
"gte": 3,
"lte": 4
}
}
},
"_source": ["content_id","content_name","search_type"],
"from": 0,
"size": 2
}
in操作
{
terms: {
user_id: user_id1,user_id2
}
}
分组聚合
GET index_xxx/_search
{
"size": 0,
"query": {
"bool": {
"must": [{
"range": {
"create_time": {
"gte": 1602724120662,
"lte": 1602866828039
}
}
}]
}
},
"aggs": {
"group_by_type": {
"terms": {
"field": "ad_type"
},
"aggs": {
"group_by_position": {
"terms": {
"field": "ad_position"
},
"aggs": {
"group_by_pv_uv_type": {
"terms": {
"field": "pv_uv_page_type"
},
"aggs":{
"group_by_time":{
"date_histogram":{
"field": "create_time",
"interval": "1h",
"min_doc_count": 0
},
"aggs": {
"group_by_pv_uv_type2": {
"cardinality": {
"field": "user_id"
}
},
"pvCount":{
"value_count": {
"field": "pv_uv_page_type"
}
},
"times":{
"field": "create_time"
},
"sales_bucket_sort": {
"bucket_sort": {
"sort": [
{"pvCount": {"order": "desc"}}
],
"from": 0,
"size":4
}
}
}
}
}
}
}
}
}
}
}
}
设置操作
#查看索引设置
GET adanalysis/_settings
#桶聚合后的hits返回数据最大数
PUT bookanalysis/_settings
{
"index.max_inner_result_window" : "9999"
}
#查询结果的最大返回数
PUT bookdetailanalysis/_settings
{
"index.max_result_window" : "2147483647"
}
#查询桶的最大返回数
PUT /_cluster/settings
{
"persistent":{
"search.max_buckets": 100000
}
}
#fielddata 断路器默认设置堆的 60% 作为 fielddata 大小的上限。
PUT /_cluster/settings
{
"persistent": {
"indices.breaker.fielddata.limit": "60%"
}
}
#request 断路器估算需要完成其他请求部分的结构大小,例如创建一个聚合桶,默认限制是堆内存的 40%。
PUT /_cluster/settings
{
"persistent": {
"indices.breaker.request.limit": "40%"
}
}
#total 揉合 request 和 fielddata 断路器保证两者组合起来不会使用超过堆内存的 70%。
PUT /_cluster/settings
{
"persistent": {
"indices.breaker.total.limit": "70%"
}
}
indices.breaker.fielddata.limit 默认值是JVM堆内存的60%,
注意为了让设置正常生效,一定要确保 indices.breaker.fielddata.limit 的值
大于 indices.fielddata.cache.size 的值。
否则的话,fielddata 大小一到 limit 阈值就报错,就永远道不了 size 阈值,
无法触发对旧数据的交换任务了。
# 索引刷新时间,-1取消刷新时间。 例如:5s
PUT /adanalysisnew/_settings
{
"refresh_interval": -1
}
# 修改索引副本数
PUT /adanalysisdata/_settings
{
"number_of_replicas":1
}
删除操作
# 参数解释如下:
# wait_for_completion=false : 开启异步
# scroll_size=2000 : 删除文档数
# slices=2 : 线程数量
# conflicts=proceed : 冲突继续???
POST adanalysis/_delete_by_query?wait_for_completion=false&conflicts=proceed&scroll_size=2000&slices=2
{
"query": {
"bool": {
"must": [
{
"range": {
"create_time": {
"gte": 1607529600000,
"lte": 1607702400000
}
}
}
]
}
}
}
其它操作
索引别名
# 单操作
PUT index/_alias/indexbieming
# 多操作
POST /_aliases
{
"actions": [
{ "remove": { "index": "adanalysisnew", "alias": "adanalysisdata" }},
{ "add": { "index": "adanalysisnews", "alias": "adanalysisdata" }}
]
}
创建索引
# number_of_shards 创建分片的数量,在网上查询过分片的大小最好是维持在20G-50G左右
PUT /index/
{
"settings" : {
"index":{
"number_of_shards" : 6, # 分片
"number_of_replicas": 0 # 副本分片
}
},
"properties": {
"create_time": {
"type": "long"
},
"device_id": {
"type": "keyword"
},
"user_id": {
"type": "long"
}
}
}
数据迁移
POST /_reindex?slices=5&wait_for_completion=false
{
"source": {
"index": "source_index",
"size" : 10000
},
"dest": {
"index": "new_index"
}
}
任务操作
# 查看所有的进行中的删除任务
GET _tasks?detailed=true&actions=*/delete/byquery
# 查看所有的进行中的任务
GET _tasks?detailed=true&actions=*
# 查看某个父任务(多线程下使用)
GET /_tasks?parent_task_id=Ljd1T6xxTpG0XEZcsGeGrA:85553680
# 查询某个任务
GET _tasks/N1NRSrCYQzCXE1d4rb0IEQ:444398383
# 取消任务
POST _tasks/N1NRSrCYQzCXE1d4rb0IEQ:444398383/_cancel
删除scroll深分页id
DELETE /_search/scroll/_all
其它优秀文档
Elasticsearch数据类型及其属性: https://www.jianshu.com/p/01f489c46c38
reindex 重建索引:https://blog.csdn.net/winterking3/article/details/108242124