Elasticearch查询语法+评分调整

最新推荐文章于 2023-12-18 16:08:34 发布

后青春的晴诗

最新推荐文章于 2023-12-18 16:08:34 发布

阅读量676

点赞数

分类专栏： Elasticearch 文章标签： es 搜索引擎

本文链接：https://blog.csdn.net/qq_43530269/article/details/103860062

版权

Elasticearch 专栏收录该内容

3 篇文章 0 订阅

订阅专栏

Query DSL语法

match查询

# 按照字段上定义的分词分析后去索引内查询
GET /movie/_search
{
  "query":{
    "match":{"title":"steve"}
  }
}

term查询

# 不进行词的分析，直接去索引查询，及搜索关键词和索引内词的精确匹配
GET /movie/_search
{
  "query":{
    "term":{"title":"steve zissou"}
  }
}

match分词后的and和or

GET /movie/_search
{
  "query":{
    "match":{"title":"basketball with cartoom aliens"},
  }
}
# 使用的是or（只要命中任一分词即可出结果）

GET /movie/_search
{
  "query":{
    "match": {
      "title": {
        "query": "basketball with cartoom aliens",
        "operator": "and" 
      }
    }
  } 
}
# 使用and（使分词全部命中才可以出结果）

最小词项匹配

# 最少命中两个分词才可以出结果
GET /movie/_search
{
  "query":{
    "match": {
      "title": {
        "query": "basketball with cartoom aliens",
        "operator": "or" ,
        "minimum_should_match": 2
      }
    }
  }
}

短语查询

# 不分词全部命中出结果
GET /movie/_search
{
  "query":{
    "match_phrase":{"title":"steve zissou"}
  }
}

# 短语前缀查询
GET /movie/_search
{
  "query":{
    "match_phrase_prefix":{"title":"steve zis"}
  }
}

多字段查询

# 在title和overview都去查询（or）
GET /movie/_search
{
  "query":{
    "multi_match":{
      "query":"basketball with cartoom aliens",
      "field":["title","overview"]
    }
  }
}

评分规则(tf/idf)*tfnorm：

tf：词频 这个document文档包含了多少个这个词，包含越多表明越相关
idf：逆文档频率 包含该词的文档总数目
tfnorm: 根据field长度做归一化，文档内出现频率越高，field越短越相关

# 操作不管是字符与还是或，按照逻辑关系命中后相加得分

GET /movie/_search
{
  "explain": true, 
  "query":{
    "match":{"title":"steve"}
  }
}
# 查看数值，tfidf多少分，tfnorm归一化后多少分

# 多字段查询索引内有query分词后的结果，因为title比overview命中更重要，因此需要加权重

GET /movie/_search
{
  "query":{
    "multi_match":{
      "query":"basketball with cartoom aliens",
      "fields":["title^10","overview"],
      "tie_break":0.3
    }
  }
}

Bool查询

must：必须都是true
must not： 必须都是false
should：其中有一个为true即可，但true的越多得分越高

GET /movie/_search
{
  "query":{
    "bool": { 
      "should": [
        { "match": { "title":"basketball with cartoom aliens"}},
        { "match": { "overview":"basketball with cartoom aliens"}}  
      ]
    }
  }
}

不同的multi_query的type和multi_match得分不一样

①best_fields:默认，取得分最高的作为对应的分数，最匹配模式,等同于dismax模式

# 以字段为单位分别计算分词的分数，然后取最好的一个,适用于最优字段匹配
GET /movie/_search
{
  "query":{
    "multi_match":{
      "query":"basketball with cartoom aliens",
      "fields":["title^10","overview"]
      "type":"best_fields"
    }
  }
}

# 使用explan看下 ((title:steve title:job) | (overview:steve overview:job))，打分规则
GET /movie/_validate/query?explain
{
  //"explain": true, 
  "query":{
    "multi_match":{
      "query":"steve job",
      "fields":["title^10","overview"],
      "operator": "or",
      "type":"best_fields"
    }
  }
}

# 将其他因素以0.3的倍数考虑进去
# dismax模式 = best_fields模式
GET /movie/_search
{
  "query":{
    "dis_max": { 
      "queries": [
        { "match": { "title":"basketball with cartoom aliens"}}, 
        { "match": { "overview":"basketball with cartoom aliens"}}  
      ],
      "tie_breaker": 0.3
    }
  }
}

②most_fields:取命中的分值相加作为分数，同should match模式，加权共同影响模式

# 以字段为单位分别计算分词的分数，然后加在一起，适用于都有影响的匹配
GET /movie/_search
{
  "query":{
    "multi_match":{
      "query":"basketball with cartoom aliens",
      "fields":["title^10","overview^0.1"]
      "type":"most_fields"
    }
  }
}

# 使用explain看下 ((title:steve title:job) | (overview:steve overview:job))~1.0，打分规则
GET /movie/_validate/query?explain
{
  //"explain": true, 
  "query":{
    "multi_match":{
      "query":"steve job",
      "fields":["title","overview"],
      "operator": "or",
      "type":"most_fields"
    }
  }
}

③cross_fields:以分词为单位计算栏位总分，词的权重较高

# 以词为单位，分别用词去不同的字段内取内容，拿高的分数后与其他词的分数相加，适用于词导向的匹配
GET /movie/_search
{
  "query":{
    "multi_match":{
      "query":"steve job",
      "fields":["title","overview"]
      "type":"cross_fields"
    }
  }
}

# 使用explain看下 blended(terms:[title:steve, overview:steve]) blended(terms:[title:job, overview:job])，打分规则
GET /movie/_validate/query?explain
{
  //"explain": true, 
  "query":{
    "multi_match":{
      "query":"steve job",
      "fields":["title","overview"],
      "operator": "or",
      "type":"most_fields"
    }
  }
}

GET /forum/article/_search
{
  "query": {
    "multi_match": {
      "query": "Peter Smith",
      "type": "cross_fields", 
      "operator": "or",
      "fields": ["author_first_name", "author_last_name"]
    }
  }
}
# 要求Peter必须在author_first_name或author_last_name中出现
# 要求Smith必须在author_first_name或author_last_name中出现
# 如果使用most_fiels，可能像Smith //Williams也可能会出现，因为most_fields要求只是任何一个field匹配了就可以，匹配的field越多，分数越高

query string:方便的利用AND(+) OR(|) NOT(-)

GET /movie/_search
{
  "query":{
    "query_string":{
      "fields":["title"],
      "query":"steve OR jobs"
     
    }
  }
}

filter单条件过滤查询

# 相当于数据库的where语句
GET /movie/_search
{
  "query":{
    "bool":{
      "filter":{
          "term":{"title":"steve"}
      }
    }
  }
}

多条件过滤查询

# lte小于，gte大于
GET /movie/_search
{
  "query":{
    "bool":{
      "filter":[
        {"term":{"title":"steve"}},
        {"term":{"cast.name":"gaspard"}},
        {"range": {"release_date":{"lte":"2015/01/01"}}},
        {"range": {"popularity":{"gte":"25"}}}
        ]
    }
  },
  "sort":[
    {"popularity":{"order":"desc"}}
  ]
}

带match打分的filter过滤查询

# must必须命中
GET /movie/_search
{
  "query":{
    "bool":{
      "must":[
        {"match":{"title":"Search"}}, 
        {"match": {"tagline":"Elasticsearch"}}  
      ],
      "filter":[
        {"term":{"title":"steve"}},
        {"term":{"cast.name":"gaspard"}},
        {"range":{"release_date":{"lte": "2015/01/01"}}},
        {"range":{"popularity":{"gte": "25"}}}
        ]
    }
  }
}

# filter控制条件过滤 should控制打分参数
GET /movie/_search
{
  "query":{
    "bool":{
      "should": [
        {"match":{"title":"Search"}}, 
        {"match":{"tagline":"Elasticsearch"}}  
      ],
      "filter":[
        {"term":{"title":"steve"}},
        {"term":{"cast.name":"gaspard"}},
        {"range":{"release_date":{"lte":"2015/01/01"}}},
        {"range":{"popularity": {"gte":"25"}}}
        ]
    }
  }
}

优秀的搜索引擎必备：

查全率：正确的结果有n个，查询出来正确的有m 则 m/n
查准率：查出的n个文档有m个正确，则m/n
两者都需要提高，但一般不可兼得，可以通过调整排序位置，将正确的结果排在上面以提高用户体验

# function score自定义打分
GET /movie/_search
{
  "query":{
    "function_score": {
      //原始查询得到oldscore
      "query": {      
        "multi_match":{
        "query":"steve job",
        "fields":["title","overview"],
        "operator": "or",
        "type":"most_fields"
      }
    },
    "functions": [
      {"field_value_factor": {
          "field": "popularity",   //对应要处理的字段
          "modifier": "log2p",    //将字段值+2后，计算对数
          "factor": 10    //字段预处理*10
        }
      }
    ], 

    "score_mode": "sum",   //不同的field value之间的得分相加
    "boost_mode": "sum"    //最后在与old value相加
  }
}
}

后青春的晴诗

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Elasticearch查询语法+评分调整

Query DSL语法match查询# 按照字段上定义的分词分析后去索引内查询GET /movie/_search{ "query":{ "match":{"title":"steve"} }}term查询# 不进行词的分析，直接去索引查询，及搜索关键词和索引内词的精确匹配GET /movie/_search{ "query":{ "term":{"...
复制链接

扫一扫