Elasticsearch filter过滤查询

最新推荐文章于 2024-07-29 17:57:54 发布

lebron3v

最新推荐文章于 2024-07-29 17:57:54 发布

阅读量2.5w

点赞数 2

分类专栏： ElasticSearch 文章标签： Elasticsearch

本文链接：https://blog.csdn.net/lebron3v/article/details/84030836

版权

ElasticSearch 专栏收录该内容

4 篇文章 1 订阅

订阅专栏

注：本实例代码是在ES marvel插件中执行

# ---------------------------------------
# filter 查询 

# 建立测试数据_1
POST /store/products/_bulk
{ "index": {"_id": 1}}
{ "price": 10, "productID": "SD1002136"}
{ "index": {"_id": 2}}
{ "price": 20, "productID": "SD2678421"}
{ "index": {"_id": 3}}
{ "price": 30, "productID": "SD8897573"}
{ "index": {"_id": 4}}
{ "price": 40, "productID": "SD4535233"}



# 查看测试数据
GET /store/products/_mget
{
  "ids" : ["1", "2", "3", "4"]
}

# 查看library的mapping信息
GET /store/_mapping


# ---------------------------------------
# 简单过滤查询

# 最简单filter查询
# SELECT document FROM products where price = 20
# filtered 查询价格是20的商品
GET /store/products/_search
{
  "query": {
    "filtered": {
      "query": {
        "match_all": {}
      },
      "filter": {
        "term": {
          "price": 200
        }
      }
    }
  }
}


# 也可以指定多个值
GET /store/products/_search
{
  "query": {
    "filtered": {
      "filter": {
        "terms": {
          "price": [10, 20]
        }
      }
    }
  }
}


# SELECT product FROM products WHERE productID = "SD4535233"
# 由于默认分析器进行分析，会将大写转小写
GET /store/products/_search
{
  "query": {
    "filtered": {
      "filter": {
        "term": {
          "productID": "sd4535233"
        }
      }
    }
  }
}

# 查看分析器解析的结果
GET /_analyze?text=SD4535233

GET /store/_mapping

DELETE /store

# 重新建立一个映射，让productID处于not_analyzed模式
PUT /store
{
  "mappings": {
    "products": {
      "properties": {
        "productID" : {
          "type" : "string",
          "index": "not_analyzed"
        }
      }
    }
  }
}

# 重新创建mapping插入数据后，进行查看
GET /store/products/_search
{
  "query": {
    "filtered": {
      "filter": {
        "term": {
          "productID": "SD4535233"
        }
      }
    }
  }
}



# -------------------------
# bool过滤查询，可以做组合过滤查询


# SELECT product FROM products WHERE (price = 20 OR productID = "SD1002136") AND (price != 30)
# 查询价格等于20的或者productID为SD4535233的商品，排除价格30元的。

# 类似的，Elasticsearch也有 and，or，not这样的组合条件的查询  方式
# 格式如下：
# {
#   "bool" : {
#     "must" :      [],
#     "should" :    [],
#     "must_not" :  []
#   }
# }
# 
# must: 条件必须满足，相当于 and
# should: 条件可以满足也可以不满足，相当于 or
# must_not: 条件不需要满足，相当于 not

GET /store/products/_search
{
  "query": {
    "filtered": {
      "filter": {
        "bool": {
          "should": [
            { "term": {
              "price": "20"
            }},
            { "term": {
              "productID": "SD1002136"
            }}
          ],
          "must_not": [
            { "term": {
              "price": "30"
            }}
          ]
        }
      }
    }
  }
}


# 嵌套处查询
# SELECT document FROM products WHERE productID ="SD1002136" OR (productID = "SD4535233" AND price = 30)
GET /store/products/_search
{
  "query": {
    "filtered": {
      "filter": {
        "bool": {
          "should": [
            { "term": {
              "productID": "SD1002136"
            }},
            {
              "bool": {
                "must": [
                  { "term": {
                    "productID": "SD4535233"
                  }},
                  {
                    "term": {
                      "price": "30"
                    }
                  }
                ]
              }
            }
          ]
        }
      }
    }
  }
}



# 另外一种 and，or，not查询
# 没有bool，直接使用and，or，not
# 
# 查询价格既是10元，productID又为SD1002136的结果
GET /store/products/_search
{
  "query": {
    "filtered": {
      "query": {
        "match_all": {}
      },
      "filter": {
        "and": [
          {
            "term" : {
              "price": 10
            }
          },
          {
            "term" : {
              "productID" : "SD1002136"
            }
          }
          ]
      }
    }
  }
}


# or
# 查询价格是10元或productID 是SD4535233的一些商品
GET /store/products/_search
{
  "query": {
    "filtered": {
      "query": { "match_all": {}},
      "filter": {
        "or": [
          {
            "term": {
              "price": 10
            }
          },
          {
            "term": {
              "productID": "SD4535233"
            }
          }
          ]
      }
    }
  }
}


# not
# 查询productID不是SD1002136的商品
GET /store/products/_search
{
  "query": {
    "filtered": {
      "query": { "match_all": {}},
      "filter": {
        "not": 
          {
            "term": {
              "productID": "SD1002136"
            }
          }
      }
    }
  }
}


# range范围过滤
# SELECT document FROM products WHERE price BETWEEN 20 AND 40
# gt  : > 大于
# lt  : < 小于
# gte : >= 大于等于
# lte : <= 小于等于
GET /store/products/_search
{
  "query": {
    "filtered": {
      "filter": {
        "range": {
          "price": {
            "gt": 20,
            "lt": 40
          }
        }
      }
    }
  }
}





# -------------------------
# 过滤空和非空

# 建立测试数据_2
POST /test_index/test/_bulk
{ "index": { "_id": "1"}}
{ "tags" : ["search"]}
{ "index": { "_id": "2"}}
{ "tags" : ["search", "open_source"]}
{ "index": { "_id": "3"}}
{ "other_field" : ["some data"]}
{ "index": { "_id": "4"}}
{ "tags" : null}
{ "index": { "_id": "5"}}
{ "tags" : ["search", null]}


# 处理null空值的方法
# SELECT tags FROM test WHERE tags IS NOT NULL
# SELECT tags FROM test WHERE tags IS NULL
GET /test_index/test/_search
{
  "query": {
    "filtered": {
      "filter": {
        "exists": {
          "field": "tags"
        }
      }
    }
  }
}


GET /test_index/test/_search
{
  "query": {
    "filtered": {
      "filter": {
        "missing": {
          "field": "tags"
        }
      }
    }
  }
}

cache缓存

Elasticsearch在执行带有filter查询时，会打开索引的每个segment文件（Lucene式底层文件），然后去判断里面的文档是否符合filter要求。
注意：旧的segment文件不会变，新来的数据会产生新的segment。

匹配的结果会用一个大型的BigSet数组来存储，这个数组的值只有0和1
匹配：1
不匹配：0
BigSet值是存在内存里的，而不是硬盘里，所以速度快！

开启方式：在filter查询语句后面加"_cache": true

注意：
Scriptfilters，Geo-filters，Data ranges这样的过滤方式开启cache无意义
exists，missing，range，term和terms查询是默认开启cache的

如果想要开启cache缓存，只需要在filter中添加"_cache": true属性就可以，例如：

# cache缓存
GET /store/products/_search
{
  "query": {
    "filtered": {
      "filter": {
        "bool": {
          "should": [
            { "term": {
              "price": "20"
            }},
            { "term": {
              "productID": "SD1002136"
            }}
          ],
          "_cache": true, 
          "must_not": [
            { "term": {
              "price": "30"
            }}
          ]
        }
      }
    }
  }
}