ElasticSearch Index查询（Query DSL）

DataCareer

已于 2023-03-03 13:56:35 修改

阅读量2.2k

点赞数

分类专栏： ElasticSearch 文章标签： elasticsearch 大数据搜索引擎

于 2023-03-03 13:47:01 首次发布

本文链接：https://blog.csdn.net/WMSOK/article/details/129318052

版权

ElasticSearch 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

先贴一个Query DSL的官方文档：https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html
我平时喜欢查看官方文档，了解数据查询和存储方面的性能优化点，下面是积累的脚本分享。

查询语句格式

GET /索引名/_search
{
    "查询类型": {
      "查询条件": "查询条件值"
    }
  }
}

查询类型：match_all，match，term，range，fuzzy，bool 等等
查询条件：查询条件会根据类型的不同，写法也有差异

分词匹配 match
match进行搜索的时候，会先分词再匹配。通过operator指定多个分词之间的逻辑关系

{
  "query": {
    "match": {
      "nickname": {
        "query": "大山",
        "operator": "and"  // 默认值为or
      }
    }
  }
}

// 下面查询是上面查询的简写
{
  "query": {
    "match": {
      "nickname": "大山"
    }
  }
}

短语匹配match_phrase
匹配含有短语中所有单词的文档，且单词之间没有插入别的单词。等价于match中指定"type": “phrase”。slop值可以指定临近的距离。

{
  "query": {
    "match_phrase": {
      "nickname": {
        "query": "大山"
      }
    }
  }
}

单项目匹配term
由于term查询时并不会对提供的查询值分词，但ES对文档指定field的值做了standard分词，因此，standard分词时term检索的值必须是不可拆分的才能匹配出结果。例如，对于汉字，只支持单个汉字的匹配；对于英文单词，只支持单个单词的匹配，也就是中间不能有空格。也因此term适合数字、boolean、date、数字字母字符串精确匹配；

{
  "query": {
    "term": {
      "author_id": "101572313821"
    }
  },
  "sort": [
    {
      "crawl_time": {
        "order":  "desc"
      }
    }
  ],
  "size": 30
}

单项目多值匹配terms

// 至少一个值在field中存在，相当于SQL中的IN
{
  "query": {
    "terms": {
      "title": ["love", "China"]
    }
  }
}
// 多个值都必须在field中存在
{
  "query": {
    "bool": {
      "must": [
        {
          "term": {
            "title": "love"
          }
        },
        {
          "term": {
            "title": "china"
          }
        }
      ]
    }
  }
}

逻辑运算should和must和must_not
must所有条件都必须满足
must_not 不能同时满足所有条件
should：至少满足一个就可以

{
  "query": {
    "bool": {
      "must": [
        {
          "range": {
            "crawl_time": {
              "gte": "2021-03-29 00:00:00",
              "lte": "2021-03-31 00:00:00"
            }
          }
        },
        {
          "match_phrase": {
            "author_id": "70577810926"
          }
        }
      ]
    }
  },
  "sort": [
    {
      "crawl_time": {
        "order": "desc"
      }
    }
  ]
}

不计算score的filter
使用 filter 来替代 must 查询，和must查询结果是一致的，差异仅是没有相关性得分，可以提高查询效率。建议了解下ES计算Score的查询实现。

{
"query": {
    "bool": {
      "filter": {
        "term": {"id": "13"}
      }
    }
 }
}

{
  "query": {
    "bool": {
      "filter": [
        {
          "range": {
            "ctime": {
              "gte": "2021-03-28 00:00:00",
              "lt": "2021-04-01 00:00:00"
            }
          }
        },
        {
          "term": {
            "verify_name": "山西大昌联品汽车销售服务有限公司"
          }
        }
      ]
    }
  }
}

// filter支持按固定分数查询。（指定 boost 匹配分数是 2 的结果，默认不填是 1）
{
  "query": {
    "constant_score": {
      "filter": {
        "match": {
          "title": "火锅"
        }
      },
      "boost": 2
    }
  }
}

多域匹配multi_match
有一个fields匹配就算成功

{
  "query": {
    "multi_match": {
      "query": "串串",
      "fields": [ "title", "tag"]
    }
  }
}

查询字符串query_string

{
  "query": {
    "query_string": {
      "query": "(水煮肉 and 回锅肉) or 西葫芦"
    }
  }
}

{
  "query": {
    "query_string": {
      "query": "中国声音",
      "analyzer": "ik_max_word", 
      "fields": ["name","content"]
    }
  }
}

查询Object类型

{
    "query": {
        "term": {
            "language.v4.keyword": "Spanish"
        }
    }
}

查询内嵌类型
字段类型必须设置"type": “nested”

{
  "query": {
    "nested": {
      "path": "series",
      "query": {
        "match": {
          "series.series_name": "车"
        }
      }
    }
  }
}

查询Array的长度

GET /aweme-comment-dev/_search
{
  "query": {
    "bool": {
      "filter": [
        {
          "script": {
            "script": {
              "source": "doc['hot_words.word'].length==2",
              "lang": "painless"
            }
          }
        }
      ]
    }
  }
}

精确获取Hit的数量

GET /aweme-comment-dev/_search
{
  "track_total_hits": true,
  "query": {
    "bool": {
      "filter": {
        "range": {
          "ctime": {
            "gte": "2021-04-10 00:00:00",
            "lte": "2021-04-10 23:59:59"
          }
        }
      }
    }
  }
}

复杂Painless查询

GET /aweme-info-pro/_search?timeout=10000m
{
  "query": {
    "bool": {
      "must": [
        {
          "range": {
            "statistics_day": {
              "gte": "2021-06-01",
              "lte": "2021-08-01"
            }
          }
        }
      ], 
      "filter": {
        "script": {
          "script": "if (doc['publish_time'].size() != 0 && doc['statistics_day'].size() !=0 ){ doc['statistics_day'].value.toLocalDate().isBefore(doc['publish_time'].value.toLocalDate())} else {return false}"
        }
      }
    }
  }
}

两个字段间比较

GET /fans-pro/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "exists": {
            "field": "fans_id"
          }
        }
      ],
      "filter": [
        {
          "script": {
            "script": {
              "source": "doc['_id'].value != doc['fans_id'].value",
              "lang": "painless"
            }
          }
        }
      ]
    }
  },
	"sort": [
	  {
	    "mtime": {
	      "order": "desc"
	    }
	  }
	]
}

只显示指定的字段 _source

GET /aweme-comment-dev/_search
{
  "_source": "fans_id",
  "query": {
    "bool": {
      "filter": {
        "range": {
          "ctime": {
            "gte": "2021-04-10 00:00:00",
            "lte": "2021-04-10 23:59:59"
          }
        }
      }
    }
  }
}

通过includes和excludes指定需要和不需要的字段

{
  "_source": {
    "includes": [ "obj1.*", "obj2.*" ],
    "excludes": [ "*.description" ]
  },
  "query": {
    "term": {
      "user.id": "kimchy"
    }
  }
}

其他请求参数

// 显示版本号
"version": true

// 显示分片信息
"explain": true

文档数量查询

GET /aweme-info-dev/_count

GET /author-info-dev/_count
{
  "query": {
    "term": {
      "author_id": "96660564486"
    }
  }
}

聚合查询
Bucket可以理解为一个桶，它会遍历文档中的内容，凡是符合某一要求的就放在一个桶中，分桶相当于sql中的group by, 关键字有Terms Aggregation，Filter Aggregation，Histogram Aggregation， Date Aggregation。

GET /aweme-comment-dev/_search
{
  "query": {
    "range": {
          "ctime": {
            "gte": "2021-04-10 00:00:00",
            "lte": "2021-04-10 23:59:59"
          }
        }
  },
  "size": 0, 
  "aggs": {
    "count_fans": {
      "value_count": {
        "field": "fans_id"
      }
    }
  }
}

GET /author-info-dev/_search
{
  "query": {
    "term": {
      "author_id": "96660564486"
    }
  },
  "size": 0, 
  "aggs": {
    "crawl_time_stats": {
      "stats": {
        "field": "crawl_time"
      }
    }
  }
}

聚合类型包含
min: Computes the minimum value for a group of buckets.
max: Computes the maximum value for a group of buckets.
avg: Computes the average value for a group of buckets.
sum: Computes the sum of all the buckets.
value_count: Computes the count of values in the bucket.
stats: Computes all the base metrics such as the min, max, avg, count, and sum.
extended_stats: Computes the stats metric plus variance, standard deviation (std_deviation), bounds of standard deviation (std_deviation_bounds), and sum of squares (sum_of_squares).
percentiles: Computes the percentiles (the point at which a certain percentage of observed values occur) of some values (see Wikipedia at http://en.wikipedia.org/wiki/Percentile for more information about percentiles).
percentile_ranks: Computes the rank of values that hit a percentile range.
cardinality: Computes an approximate count of distinct values in a field.
geo_bounds: Computes the maximum geobounds in the document where the GeoPoints are.
geo_centroid: Computes the centroid in the document where GeoPoints are.

按值聚合

GET /aweme-info-dev/_search
{
  "query": {
    "range": {
      "ctime": {
        "gte": "2021-04-10 00:00:00",
        "lte": "2021-04-10 23:59:59"
      }
    }
  },
  "size": 0,
  "aggs": {
    "aweme_id_agg": {
      "terms": {
        "field": "author_id",
        "size": 100   # 按文档数量doc_count字段倒排序后并取前100个author_id
      },
      "aggs": {
        "total_digg_count": {
          "sum": {
            "field": "digg_count_di"
          }
        },
        "avg_digg_count": {
          "avg": {
            "field": "digg_count_di"
          }
        },
        "agg_digg_count": {
          "terms": {
            "field": "digg_count_di"
          }
        }
      }
    }
  }
}

修改terms的排序方式：“order”: {“_count”: “asc”}
指定terms中doc_count的最小数量：“min_doc_count”: 3
指定terms中需要统计的值：“include”: [“BMW”, “Audi”]

GET /aweme-info-dev/_search
{
  "size": 0,  
  "query": {
    "range": {
      "ctime": {
        "gte": "2021-04-10 00:00:00",
        "lte": "2021-04-10 23:59:59"
      }
    }
  },
  "aggs": {
    "最小": {
      "min": {
        "field": "digg_count"
      }
    },
    "最大": {
      "max": {
        "field": "digg_count"
      }
    },
    "平均值": {
      "avg": {
        "field": "digg_count"
      }
    },
    "求和": {
      "sum": {
        "field": "digg_count"
      }
    },
    "去重count": {
      "cardinality": {
        "field": "author_id"
      }
    },
    "分组去重": {
      "terms": {
        "field": "aweme_id",
        "size": 10
      },
      "aggs": {
        "组内去重": {
          "cardinality": {
            "field": "author_id"
          }
        },
        "组内求和": {
          "sum": {
            "field": "digg_count"
          }
        }
      }
    }
  }
}

按值范围聚合

GET /author-info-dev/_search
{
  "track_total_hits": true,
  "size": 0, 
  "aggs": {
    "digg_distribute": {
      "range": {
        "field": "comment_count",
        "ranges": [
          {"to": 500},
          {"from": 501, "to": 1000},
          {"from": 1001}
        ]
      }
    }
  }
}

GET /cars/cars/_search
{
  "aggs": {
    "range": {
      "date_range": {
        "field": "sellTime",
        "format": "yyyy", 
        "ranges": [
          {
            "from": "2014",
            "to": "2019"
          }
        ]
      }
    }
  }
}

直方图聚合（按值间隔聚合）

GET /cars/cars/_search
{
  "aggs": {
    "prices": {
      "histogram": {
        "field": "price",
        "interval": 10000
      }
    }
  }
}

GET /cars/cars/_search
{
  "aggs": {
    "sales_over_time": {
      "date_histogram": {
        "field": "sellTime",
        "interval": "month",
        "format": "yyyy-MM-dd"
      }
    }
  }
}

GET /fans-follow-pro/_search
{
  "query": {
    "bool": {
      "filter": {
        "range": {
          "crawl_time": {
            "gte": "2021-04-12 00:00:00",
            "lte": "2021-04-21 00:00:00"
          }
        }
      }
    }
  },
  "size": 0,
  "aggs": {
    "range": {
      "date_histogram": {
        "field": "crawl_time",
        "format": "yyyy-MM-dd",
        "interval": "day"
      }
    }
  }
}

分组排序取第一

GET /aweme-info-dev/_search
{
  "query": {
    "range": {
      "ctime": {
        "gte": "2021-04-10 00:00:00",
        "lte": "2021-04-10 23:59:59"
      }
    }
  },
  "size": 0,
  "aggs": {
    "aweme_id_agg": {
      "terms": {
        "field": "author_id",
        "order": {
          "_count": "asc"     
        },
        "size": 10    
      },
      "aggs": {
        "NAME": {
          "top_hits": {
            "size": 1, 
            "sort": {"publish_time": "desc"},  # 按publish_time倒排序
            "_source": ["publish_time", "ctime"] 
          }
        }
      }
    }
  }
}


GET /dongchedi-live-info-afanti-pro/_search
{
  "query": {
    "range": {
      "crawl_time": {
        "lte": "2021-08-19 00:00:00",
        "gte": "2021-08-18 00:00:00"
      }
    }
  },
  "aggs": {
    "NAME": {
      "terms": {
        "field": "room_id",
        "size": 10,
        "order": {
          "_term": "asc"
        }
      },
      "aggs": {
        "NAME": {
          "top_hits": {
            "size": 1,
            "sort": [
              {
                "crawl_time": {
                  "order": "desc"
                }
              }
            ],"_source": ["crawl_time", "room_id", "room_status", "finish_time"]
          }
        }
      }
    }
  },
  "size": 0
}

Group by + Distinct + Count

GET /dongchedi-live-polling-afanti-pro/_search
{
  "query": {
    "range": {
      "crawl_time": {
        "lte": "2021-08-18 00:00:00",
        "gte": "2021-08-01 00:00:00"
      }
    }
  },
  "aggs": {
    "NAME": {
      "date_histogram": {
        "field": "crawl_time",
        "interval": "day"
      },"aggs": {
        "doc_count": {
          "cardinality": {
            "field": "msg_id"
          }
        }
      }
    }
  }
}


GET /dongchedi-live-polling-afanti-pro/_search
{
  "query": {
    "range": {
      "crawl_time": {
        "lte": "2021-09-23 00:00:00",
        "gte": "2021-08-18 00:00:00"
      }
    }
  },
  "aggs": {
    "NAME": {
      "date_histogram": {
        "field": "crawl_time",
        "interval": "day"
      },
      "aggs": {
        "abc": {
          "cardinality": {
            "precision_threshold": 40000, # 当统计结果小于配置的 precision_threshold，此时是准确的；反之，则统计结果可能会有误差。最大为40000。
            "script": {
              "source": "doc['room_id'].value + ' ' + doc['msg_id'].value"
            }
          }
        }
      }
    }
  }
}

Group by + Having+Count + Distinct

GET /aweme-info-pro/_search
{
  "size": 0,
  "aggs": {
    "NAME": {
      "terms": {
        "field": "item_id",
        "size": 200,
        "order": {
          "amount": "desc"
        }
      },
      "aggs": {
        "amount": {
          "cardinality": {
            "field": "author_id"
          }
        },
        "having": {
          "bucket_selector": {
            "buckets_path": {
              "amount": "amount"
            },
            "script": {
              "source": "params.amount >= 2 "
            }
          }
        }
      }
    }
  }
}

Collapse实现Distinct

GET /author-info-dev/_search
{
  "query": {
    "term": {
      "author_id": "96660564486"
    }
  },
  "collapse": {
    "field": "author_id"
  }
}

通配符wildcard和模糊匹配fuzzy
wildcard和fuzzy在standard分词器都无法匹配多个汉字

GET /author-info-dev/_search
{
  "query": {
    "bool": {
      "filter": {
        "wildcard": {
          "nickname": {
            "value": "姐"
          }
        }
      }
    }
  },
  "_source": "nickname"
}

GET /author-info-dev/_search
{
  "query": {
    "bool": {
      "filter": {
        "wildcard": {
          "nickname": {
            "value": "*姐*"
          }
        }
      }
    }
  },
  "size": 0,
  "aggs": {
    "distinct_nickname": {
      "terms": {
        "field": "city_id",
        "size": 100
      }
    }
  }
}

GET /author-info-dev/_search
{
  "query": {
    "fuzzy": {
      "signature":"老司机"
    }
  }
}

Cannot search on field [signature] since it is not indexed.

前缀查询Prefix
prefix在standard分词器也无法匹配多个汉字

GET /author-info-dev/_search
{
  "query": {
    "prefix": {
      "author_id": {
        "value": "2"
      }
    }
  },
  "_source": "author_id"
}

DataCareer

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
ElasticSearch Index查询（Query DSL）

ES 查询快速入门
复制链接

扫一扫

专栏目录