ElasticSearch7高端进阶【分布式搜索引擎进阶学习】查询语句进阶

8-9 查询语句进阶(5)_慕课网 tf idf 算分相关的详细计算

 多字段查询

ElasticSearch评分分析 explian 解释和一些查询理解

// 使用analyze api 查看分词状态
GET /movie/_analyze
{
  "field": "name",
  "text":"Eating an apple a day & keeps the doctors away"

//  使用结构化的方式重新创建索引
PUT /movie
{
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 1
  },
  "mappings": {
    "properties": {
      "name":{
        "type": "text","analyzer": "english"
      },
      "age":{
        "type": "integer"
      }
    }
  }

// 玩转tmdb 索引建立:

PUT /movie
{
  "settings": {
    "number_of_replicas": 1,
    "number_of_shards": 1
  },
  "mappings": {
    "properties": {
      "title":{"type": "text","analyzer": "english"},
      "tagline":{"type": "text","analyzer": "english"},
      "release_date":{
        "type": "date","format": "8yyyy/MM/dd||yyyy/M/dd||yyyy/MM/d||yyyy/M/d"
      },
      "popularity":{"type": "double"},
      "overview":{"type": "text","analyzer": "english"},
      "cast":{
        "type": "object",
        "properties": {
          "character":{
            "type":"text","analyzer":"standard"
          },
          "name":{
            "type":"text","analyzer":"standard"
          }
        }
      }
    }
  }
}
Query DSL 简单实验
1. match 查询,按照字段上定义的分词分析后去索引内查询
2. term 查询,不进行词的分析,直接去索引查询,及搜索关键词和索引内词的精确匹
GET /movie/_search
{
  "query": {"match": {
    "title": "steve zissou"
  }}
}

GET /movie/_search
{
  "query": {
    "term": {
      "title": "steve zissou"
    }
  }
}

// 多字段查询

GET /movie/_search
{
  "query": {
    "multi_match": {
      "query": "basketball with cartoom aliens",
      "fields": ["title^10","overview"]
    }
  }
}

GET /movie/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "title": "basketball with cartoom aliens"
          }
        },
        {
          "match": {
            "overview": "basketball with cartoom aliens"
          }
        }
      ]
    }
  }
}

// term 查询不进行分词的分析,直接去索引内查询

GET /movie/_search
{
  "query": {
    "term": {
      "title": {
        "value": "steve jobs"
      }
    }
  }
}

# 分词后的and和or的逻辑 match默认是or
操作不管是字符与还是或,按照逻辑关系命中后相加得分

GET /movie/_search
{
  "query": {
    "match": {
      "title": "basketball with cartoom aliens"
    }
  }
}

# 改成and 

GET /movie/_search
{
  "query": {
    "match": {
      "title": {
        "query": "basketball with cartoom aliens",
        "operator": "and"
      }
    }
  }
}
查看数值, tfidf 多少分, tfnorm 归一化后多少分
多字段查询索引内有 query 分词后的结果,因为 title overview 命中更重要,因此需要加权重

 

GET /movie/_search
{
  "query": {
    "multi_match": {
      "query": "basketball with cartoom aliens",
      "fields": [
        "title^10",
        "overview"
      ],
      "tie_break": 0.3
    }
  }
}

 

# 最小词匹配项 minimum_should_match 对应的分词至少命中2个

GET /movie/_search
{
  "query": {
    "match": {
      "title": {
        "query": "basketball with cartoom aliens",
        "operator": "or",
        "minimum_should_match":2
      }
    }
  }
}

 # 短语查询

GET /movie2/_search
{
  "query": {
    "match_phrase": {
      "title": "steve zissou"
    }
  }
}

 # 多字段查询

GET /movie2/_search
{
  "explain": true, 
  "query": {
    "multi_match": {
      "query": "basketbal with cartoom aliens",
      "fields": ["title","overview"]
    }
  }
}

# 优化多字段查询

GET /movie2/_search
{
  "explain": true, 
  "query": {
    "multi_match": {
      "query": "basketball with cartoom aliens",
      "fields": ["title^10","overview"],
      "type":"best_fields"
    }
  }
}

 #不同的multi_query的type和multi_match得分不一样

GET /movie2/_search
{
  "query": {
    "dis_max": {
      "queries": [
        {"match":{
          "title":"basketball with cartoom aliens"
        }},{
          "match":{
             "overview":"basketball with cartoom aliens"
          }
        }
      ]
    }
  }
}

GET /movie2/_validate/query?explain
{
  "query": {
    "multi_match": {
      "query": "basketball with cartoom aliens",
      "fields": ["title^10","overview"],
      "type": "best_fields"
    }
  }
}

 #most_fields 考虑绝大多数(所有的)文档的字段得分相加,获得我们想要的结果

GET /movie2/_search
{
  "query": {
    "multi_match": {
      "query": "basketball with cartoom aliens",
      "fields": ["title","overview"],
      "type": "most_fields"
    }
  }
}
继续深入查询:
1. Bool 查询
must :必须都是 true
must not : 必须都是 false
should :其中有一个为 true 即可,但 true的越多得分越高
GET /movie2/_search
{
  "query": {
    "bool": {
      "should": [
        {"match": {
          "title": "basketball with cartoom aliens"
        },"match": {
          "overview": "basketball with cartoom aliens"
        }}
      ]
    }
  }
}
2. 不同的 multi_query type multi_match 得分不一样
因为 multi_match 有很多种 type
best_fields: 默认,取得分最高的作为对应的分数,最匹配模式 , 等同于 dismax 模式
GET /movie2/_search
{
  "explain": true, 
  "query": {
    "multi_match": {
      "query": "basketball with cartoom aliens",
      "fields": ["title^10","overview"],
      "type":"best_fields"
    }
  }
}

#cross_fields : 以分词为单位计算栏位 的总分,使用于词导向的匹配

GET  /movie2/_search
{
  "explain": true, 
  "query": {
    "multi_match": {
      "query": "steve job",
      "fields": ["title","overview"],
      "type": "cross_fields"
    }
  }
}
GET  /movie2/_validate/query?explain
{
  "query": {
    "multi_match": {
      "query": "steve job",
      "fields": ["title","overview"],
      "type": "cross_fields"
    }
  }
}

#query string
# 方便的利用 AND or NOT

 
GET movie2/_search
{
  "query": {
    "query_string": {
      "fields": ["title"],
      "query": "steve or jobs"
    }
  }
}
过滤查询
filter过滤查询
单条件过滤
GET /movie2/_search
{
  "query": {
    "bool": {
      "filter": {
        "term": {
          "title": "steve"
        }
      }
    }
  }
}

#多条件过滤

GET /movie2/_search
{
  "query": {
    "bool": {
      "filter": [{
        "term": {
          "title": "steve"
        }
      },{
        "term":{
          "cast.name":"gaspard"
        }
      },{
        "range":{
          "release_date":{"lte":"2015/01/01"}
        }
      },{
        "range":{
          "popularity":{
            "gte":"15"
          }
        }
      }]
    }
  },
  "sort":[
    {
      "popularity": {"order": "desc"}
    }
    ]
  
}

# 带match 打分的filter should和filter  filter控制条件过滤 should控制返回参数

GET /movie2/_search
{
  "query": {
    "bool": {
      "should": [
        {"match": {
          "title": "life"
        }}
      ], 
      "filter": [{
        "term": {
          "title": "steve"
        }
      },{
        "term":{
          "cast.name":"gaspard"
        }
      },{
        "range":{
          "release_date":{"lte":"2015/01/01"}
        }
      },{
        "range":{
          "popularity":{
            "gte":"15"
          }
        }
      }]
    }
  }
}

#functionscore
 

GET /movie2/_search
{
  "explain": true, 
  "query": {
    "function_score": {
      // 原始查询得到的oldscore
      "query": {
        "multi_match": {
          "query": "steve job",
          "fields": ["title","overview"],
          "operator": "or",
          "type": "most_fields"
        }
      },
      "functions": [
        {
          "field_value_factor": {
            "field": "popularity", // 知名度 对应要调整处理的字段
             "modifier": "log2p",
             "factor": 10
          }
        },
          {
          "field_value_factor": {
            "field": "popularity", // 知名度 对应要调整处理的字段
             "modifier": "log2p",
             "factor": 5
          }
        }
      ],
      "score_mode": "sum" // 不同的field value 之间得分相加
      , "boost_mode": "sum" // 最好再与old Value相加
    }
  }
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值