ElasticSearch Index查询(Query DSL)

先贴一个Query DSL的官方文档:https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html
我平时喜欢查看官方文档,了解数据查询和存储方面的性能优化点,下面是积累的脚本分享。

  1. 查询语句格式
GET /索引名/_search
{
    "查询类型": {
      "查询条件": "查询条件值"
    }
  }
}
  • 查询类型:match_all,match,term,range,fuzzy,bool 等等
  • 查询条件:查询条件会根据类型的不同,写法也有差异
  1. 分词匹配 match
    match进行搜索的时候,会先分词再匹配。通过operator指定多个分词之间的逻辑关系
{
  "query": {
    "match": {
      "nickname": {
        "query": "大山",
        "operator": "and"  // 默认值为or
      }
    }
  }
}

// 下面查询是上面查询的简写
{
  "query": {
    "match": {
      "nickname": "大山"
    }
  }
}
  1. 短语匹配match_phrase
    匹配含有短语中所有单词的文档,且单词之间没有插入别的单词。等价于match中指定"type": “phrase”。slop值可以指定临近的距离。
{
  "query": {
    "match_phrase": {
      "nickname": {
        "query": "大山"
      }
    }
  }
}
  1. 单项目匹配term
    由于term查询时并不会对提供的查询值分词,但ES对文档指定field的值做了standard分词,因此,standard分词时term检索的值必须是不可拆分的才能匹配出结果。 例如,对于汉字,只支持单个汉字的匹配;对于英文单词,只支持单个单词的匹配,也就是中间不能有空格。也因此term适合数字、boolean、date、数字字母字符串精确匹配;
{
  "query": {
    "term": {
      "author_id": "101572313821"
    }
  },
  "sort": [
    {
      "crawl_time": {
        "order":  "desc"
      }
    }
  ],
  "size": 30
}
  1. 单项目多值匹配terms
// 至少一个值在field中存在,相当于SQL中的IN
{
  "query": {
    "terms": {
      "title": ["love", "China"]
    }
  }
}
// 多个值都必须在field中存在
{
  "query": {
    "bool": {
      "must": [
        {
          "term": {
            "title": "love"
          }
        },
        {
          "term": {
            "title": "china"
          }
        }
      ]
    }
  }
}

  1. 逻辑运算should和must和must_not
    must所有条件都必须满足
    must_not 不能同时满足所有条件
    should:至少满足一个就可以
{
  "query": {
    "bool": {
      "must": [
        {
          "range": {
            "crawl_time": {
              "gte": "2021-03-29 00:00:00",
              "lte": "2021-03-31 00:00:00"
            }
          }
        },
        {
          "match_phrase": {
            "author_id": "70577810926"
          }
        }
      ]
    }
  },
  "sort": [
    {
      "crawl_time": {
        "order": "desc"
      }
    }
  ]
}
  1. 不计算score的filter
    使用 filter 来替代 must 查询,和must查询结果是一致的,差异仅是没有相关性得分,可以提高查询效率。建议了解下ES计算Score的查询实现。
{
"query": {
    "bool": {
      "filter": {
        "term": {"id": "13"}
      }
    }
 }
}

{
  "query": {
    "bool": {
      "filter": [
        {
          "range": {
            "ctime": {
              "gte": "2021-03-28 00:00:00",
              "lt": "2021-04-01 00:00:00"
            }
          }
        },
        {
          "term": {
            "verify_name": "山西大昌联品汽车销售服务有限公司"
          }
        }
      ]
    }
  }
}

// filter支持按固定分数查询。(指定 boost 匹配分数是 2 的结果,默认不填是 1)
{
  "query": {
    "constant_score": {
      "filter": {
        "match": {
          "title": "火锅"
        }
      },
      "boost": 2
    }
  }
}
  1. 多域匹配multi_match
    有一个fields匹配就算成功
{
  "query": {
    "multi_match": {
      "query": "串串",
      "fields": [ "title", "tag"]
    }
  }
}
  1. 查询字符串query_string
{
  "query": {
    "query_string": {
      "query": "(水煮肉 and 回锅肉) or 西葫芦"
    }
  }
}

{
  "query": {
    "query_string": {
      "query": "中国声音",
      "analyzer": "ik_max_word", 
      "fields": ["name","content"]
    }
  }
}
  1. 查询Object类型
{
    "query": {
        "term": {
            "language.v4.keyword": "Spanish"
        }
    }
}
  1. 查询内嵌类型
    字段类型必须设置"type": “nested”
{
  "query": {
    "nested": {
      "path": "series",
      "query": {
        "match": {
          "series.series_name": "车"
        }
      }
    }
  }
}
  1. 查询Array的长度
GET /aweme-comment-dev/_search
{
  "query": {
    "bool": {
      "filter": [
        {
          "script": {
            "script": {
              "source": "doc['hot_words.word'].length==2",
              "lang": "painless"
            }
          }
        }
      ]
    }
  }
}
  1. 精确获取Hit的数量
GET /aweme-comment-dev/_search
{
  "track_total_hits": true,
  "query": {
    "bool": {
      "filter": {
        "range": {
          "ctime": {
            "gte": "2021-04-10 00:00:00",
            "lte": "2021-04-10 23:59:59"
          }
        }
      }
    }
  }
}
  1. 复杂Painless查询
GET /aweme-info-pro/_search?timeout=10000m
{
  "query": {
    "bool": {
      "must": [
        {
          "range": {
            "statistics_day": {
              "gte": "2021-06-01",
              "lte": "2021-08-01"
            }
          }
        }
      ], 
      "filter": {
        "script": {
          "script": "if (doc['publish_time'].size() != 0 && doc['statistics_day'].size() !=0 ){ doc['statistics_day'].value.toLocalDate().isBefore(doc['publish_time'].value.toLocalDate())} else {return false}"
        }
      }
    }
  }
}
  1. 两个字段间比较
GET /fans-pro/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "exists": {
            "field": "fans_id"
          }
        }
      ],
      "filter": [
        {
          "script": {
            "script": {
              "source": "doc['_id'].value != doc['fans_id'].value",
              "lang": "painless"
            }
          }
        }
      ]
    }
  },
	"sort": [
	  {
	    "mtime": {
	      "order": "desc"
	    }
	  }
	]
}
  1. 只显示指定的字段 _source
GET /aweme-comment-dev/_search
{
  "_source": "fans_id",
  "query": {
    "bool": {
      "filter": {
        "range": {
          "ctime": {
            "gte": "2021-04-10 00:00:00",
            "lte": "2021-04-10 23:59:59"
          }
        }
      }
    }
  }
}
  1. 通过includes和excludes指定需要和不需要的字段
{
  "_source": {
    "includes": [ "obj1.*", "obj2.*" ],
    "excludes": [ "*.description" ]
  },
  "query": {
    "term": {
      "user.id": "kimchy"
    }
  }
}
  1. 其他请求参数
// 显示版本号
"version": true

// 显示分片信息
"explain": true
  1. 文档数量查询
GET /aweme-info-dev/_count

GET /author-info-dev/_count
{
  "query": {
    "term": {
      "author_id": "96660564486"
    }
  }
}
  1. 聚合查询
    Bucket可以理解为一个桶,它会遍历文档中的内容,凡是符合某一要求的就放在一个桶中,分桶相当于sql中的group by, 关键字有Terms Aggregation,Filter Aggregation,Histogram Aggregation, Date Aggregation。
GET /aweme-comment-dev/_search
{
  "query": {
    "range": {
          "ctime": {
            "gte": "2021-04-10 00:00:00",
            "lte": "2021-04-10 23:59:59"
          }
        }
  },
  "size": 0, 
  "aggs": {
    "count_fans": {
      "value_count": {
        "field": "fans_id"
      }
    }
  }
}

GET /author-info-dev/_search
{
  "query": {
    "term": {
      "author_id": "96660564486"
    }
  },
  "size": 0, 
  "aggs": {
    "crawl_time_stats": {
      "stats": {
        "field": "crawl_time"
      }
    }
  }
}

聚合类型包含
min: Computes the minimum value for a group of buckets.
max: Computes the maximum value for a group of buckets.
avg: Computes the average value for a group of buckets.
sum: Computes the sum of all the buckets.
value_count: Computes the count of values in the bucket.
stats: Computes all the base metrics such as the min, max, avg, count, and sum.
extended_stats: Computes the stats metric plus variance, standard deviation (std_deviation), bounds of standard deviation (std_deviation_bounds), and sum of squares (sum_of_squares).
percentiles: Computes the percentiles (the point at which a certain percentage of observed values occur) of some values (see Wikipedia at http://en.wikipedia.org/wiki/Percentile for more information about percentiles).
percentile_ranks: Computes the rank of values that hit a percentile range.
cardinality: Computes an approximate count of distinct values in a field.
geo_bounds: Computes the maximum geobounds in the document where the GeoPoints are.
geo_centroid: Computes the centroid in the document where GeoPoints are.

  1. 按值聚合
GET /aweme-info-dev/_search
{
  "query": {
    "range": {
      "ctime": {
        "gte": "2021-04-10 00:00:00",
        "lte": "2021-04-10 23:59:59"
      }
    }
  },
  "size": 0,
  "aggs": {
    "aweme_id_agg": {
      "terms": {
        "field": "author_id",
        "size": 100   # 按文档数量doc_count字段倒排序后并取前100个author_id
      },
      "aggs": {
        "total_digg_count": {
          "sum": {
            "field": "digg_count_di"
          }
        },
        "avg_digg_count": {
          "avg": {
            "field": "digg_count_di"
          }
        },
        "agg_digg_count": {
          "terms": {
            "field": "digg_count_di"
          }
        }
      }
    }
  }
}

修改terms的排序方式:“order”: {“_count”: “asc”}
指定terms中doc_count的最小数量:“min_doc_count”: 3
指定terms中需要统计的值:“include”: [“BMW”, “Audi”]

GET /aweme-info-dev/_search
{
  "size": 0,  
  "query": {
    "range": {
      "ctime": {
        "gte": "2021-04-10 00:00:00",
        "lte": "2021-04-10 23:59:59"
      }
    }
  },
  "aggs": {
    "最小": {
      "min": {
        "field": "digg_count"
      }
    },
    "最大": {
      "max": {
        "field": "digg_count"
      }
    },
    "平均值": {
      "avg": {
        "field": "digg_count"
      }
    },
    "求和": {
      "sum": {
        "field": "digg_count"
      }
    },
    "去重count": {
      "cardinality": {
        "field": "author_id"
      }
    },
    "分组去重": {
      "terms": {
        "field": "aweme_id",
        "size": 10
      },
      "aggs": {
        "组内去重": {
          "cardinality": {
            "field": "author_id"
          }
        },
        "组内求和": {
          "sum": {
            "field": "digg_count"
          }
        }
      }
    }
  }
}
  1. 按值范围聚合
GET /author-info-dev/_search
{
  "track_total_hits": true,
  "size": 0, 
  "aggs": {
    "digg_distribute": {
      "range": {
        "field": "comment_count",
        "ranges": [
          {"to": 500},
          {"from": 501, "to": 1000},
          {"from": 1001}
        ]
      }
    }
  }
}

GET /cars/cars/_search
{
  "aggs": {
    "range": {
      "date_range": {
        "field": "sellTime",
        "format": "yyyy", 
        "ranges": [
          {
            "from": "2014",
            "to": "2019"
          }
        ]
      }
    }
  }
}
  1. 直方图聚合(按值间隔聚合)
GET /cars/cars/_search
{
  "aggs": {
    "prices": {
      "histogram": {
        "field": "price",
        "interval": 10000
      }
    }
  }
}

GET /cars/cars/_search
{
  "aggs": {
    "sales_over_time": {
      "date_histogram": {
        "field": "sellTime",
        "interval": "month",
        "format": "yyyy-MM-dd"
      }
    }
  }
}

GET /fans-follow-pro/_search
{
  "query": {
    "bool": {
      "filter": {
        "range": {
          "crawl_time": {
            "gte": "2021-04-12 00:00:00",
            "lte": "2021-04-21 00:00:00"
          }
        }
      }
    }
  },
  "size": 0,
  "aggs": {
    "range": {
      "date_histogram": {
        "field": "crawl_time",
        "format": "yyyy-MM-dd",
        "interval": "day"
      }
    }
  }
}
  1. 分组排序取第一
GET /aweme-info-dev/_search
{
  "query": {
    "range": {
      "ctime": {
        "gte": "2021-04-10 00:00:00",
        "lte": "2021-04-10 23:59:59"
      }
    }
  },
  "size": 0,
  "aggs": {
    "aweme_id_agg": {
      "terms": {
        "field": "author_id",
        "order": {
          "_count": "asc"     
        },
        "size": 10    
      },
      "aggs": {
        "NAME": {
          "top_hits": {
            "size": 1, 
            "sort": {"publish_time": "desc"},  # 按publish_time倒排序
            "_source": ["publish_time", "ctime"] 
          }
        }
      }
    }
  }
}


GET /dongchedi-live-info-afanti-pro/_search
{
  "query": {
    "range": {
      "crawl_time": {
        "lte": "2021-08-19 00:00:00",
        "gte": "2021-08-18 00:00:00"
      }
    }
  },
  "aggs": {
    "NAME": {
      "terms": {
        "field": "room_id",
        "size": 10,
        "order": {
          "_term": "asc"
        }
      },
      "aggs": {
        "NAME": {
          "top_hits": {
            "size": 1,
            "sort": [
              {
                "crawl_time": {
                  "order": "desc"
                }
              }
            ],"_source": ["crawl_time", "room_id", "room_status", "finish_time"]
          }
        }
      }
    }
  },
  "size": 0
}
  1. Group by + Distinct + Count
GET /dongchedi-live-polling-afanti-pro/_search
{
  "query": {
    "range": {
      "crawl_time": {
        "lte": "2021-08-18 00:00:00",
        "gte": "2021-08-01 00:00:00"
      }
    }
  },
  "aggs": {
    "NAME": {
      "date_histogram": {
        "field": "crawl_time",
        "interval": "day"
      },"aggs": {
        "doc_count": {
          "cardinality": {
            "field": "msg_id"
          }
        }
      }
    }
  }
}


GET /dongchedi-live-polling-afanti-pro/_search
{
  "query": {
    "range": {
      "crawl_time": {
        "lte": "2021-09-23 00:00:00",
        "gte": "2021-08-18 00:00:00"
      }
    }
  },
  "aggs": {
    "NAME": {
      "date_histogram": {
        "field": "crawl_time",
        "interval": "day"
      },
      "aggs": {
        "abc": {
          "cardinality": {
            "precision_threshold": 40000, # 当统计结果小于配置的 precision_threshold,此时是准确的;反之,则统计结果可能会有误差。最大为40000"script": {
              "source": "doc['room_id'].value + ' ' + doc['msg_id'].value"
            }
          }
        }
      }
    }
  }
}
  1. Group by + Having+Count + Distinct
GET /aweme-info-pro/_search
{
  "size": 0,
  "aggs": {
    "NAME": {
      "terms": {
        "field": "item_id",
        "size": 200,
        "order": {
          "amount": "desc"
        }
      },
      "aggs": {
        "amount": {
          "cardinality": {
            "field": "author_id"
          }
        },
        "having": {
          "bucket_selector": {
            "buckets_path": {
              "amount": "amount"
            },
            "script": {
              "source": "params.amount >= 2 "
            }
          }
        }
      }
    }
  }
}
  1. Collapse实现Distinct
GET /author-info-dev/_search
{
  "query": {
    "term": {
      "author_id": "96660564486"
    }
  },
  "collapse": {
    "field": "author_id"
  }
}
  1. 通配符wildcard和模糊匹配fuzzy
    wildcard和fuzzy在standard分词器都无法匹配多个汉字
GET /author-info-dev/_search
{
  "query": {
    "bool": {
      "filter": {
        "wildcard": {
          "nickname": {
            "value": "姐"
          }
        }
      }
    }
  },
  "_source": "nickname"
}

GET /author-info-dev/_search
{
  "query": {
    "bool": {
      "filter": {
        "wildcard": {
          "nickname": {
            "value": "*姐*"
          }
        }
      }
    }
  },
  "size": 0,
  "aggs": {
    "distinct_nickname": {
      "terms": {
        "field": "city_id",
        "size": 100
      }
    }
  }
}

GET /author-info-dev/_search
{
  "query": {
    "fuzzy": {
      "signature":"老司机"
    }
  }
}

Cannot search on field [signature] since it is not indexed.

  1. 前缀查询Prefix
    prefix在standard分词器也无法匹配多个汉字
GET /author-info-dev/_search
{
  "query": {
    "prefix": {
      "author_id": {
        "value": "2"
      }
    }
  },
  "_source": "author_id"
}
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值