Query DSL语法
match查询
# 按照字段上定义的分词分析后去索引内查询
GET /movie/_search
{
"query":{
"match":{"title":"steve"}
}
}
term查询
# 不进行词的分析,直接去索引查询,及搜索关键词和索引内词的精确匹配
GET /movie/_search
{
"query":{
"term":{"title":"steve zissou"}
}
}
match分词后的and和or
GET /movie/_search
{
"query":{
"match":{"title":"basketball with cartoom aliens"},
}
}
# 使用的是or(只要命中任一分词即可出结果)
GET /movie/_search
{
"query":{
"match": {
"title": {
"query": "basketball with cartoom aliens",
"operator": "and"
}
}
}
}
# 使用and(使分词全部命中才可以出结果)
最小词项匹配
# 最少命中两个分词才可以出结果
GET /movie/_search
{
"query":{
"match": {
"title": {
"query": "basketball with cartoom aliens",
"operator": "or" ,
"minimum_should_match": 2
}
}
}
}
短语查询
# 不分词全部命中出结果
GET /movie/_search
{
"query":{
"match_phrase":{"title":"steve zissou"}
}
}
# 短语前缀查询
GET /movie/_search
{
"query":{
"match_phrase_prefix":{"title":"steve zis"}
}
}
多字段查询
# 在title和overview都去查询(or)
GET /movie/_search
{
"query":{
"multi_match":{
"query":"basketball with cartoom aliens",
"field":["title","overview"]
}
}
}
评分规则(tf/idf)*tfnorm:
tf:词频 这个document文档包含了多少个这个词,包含越多表明越相关
idf:逆文档频率 包含该词的文档总数目
tfnorm: 根据field长度做归一化,文档内出现频率越高,field越短越相关
# 操作不管是字符与还是或,按照逻辑关系命中后相加得分
GET /movie/_search
{
"explain": true,
"query":{
"match":{"title":"steve"}
}
}
# 查看数值,tfidf多少分,tfnorm归一化后多少分
# 多字段查询索引内有query分词后的结果,因为title比overview命中更重要,因此需要加权重
GET /movie/_search
{
"query":{
"multi_match":{
"query":"basketball with cartoom aliens",
"fields":["title^10","overview"],
"tie_break":0.3
}
}
}
Bool查询
must:必须都是true
must not: 必须都是false
should:其中有一个为true即可,但true的越多得分越高
GET /movie/_search
{
"query":{
"bool": {
"should": [
{ "match": { "title":"basketball with cartoom aliens"}},
{ "match": { "overview":"basketball with cartoom aliens"}}
]
}
}
}
不同的multi_query的type和multi_match得分不一样
①best_fields:默认,取得分最高的作为对应的分数,最匹配模式,等同于dismax模式
# 以字段为单位分别计算分词的分数,然后取最好的一个,适用于最优字段匹配
GET /movie/_search
{
"query":{
"multi_match":{
"query":"basketball with cartoom aliens",
"fields":["title^10","overview"]
"type":"best_fields"
}
}
}
# 使用explan看下 ((title:steve title:job) | (overview:steve overview:job)),打分规则
GET /movie/_validate/query?explain
{
//"explain": true,
"query":{
"multi_match":{
"query":"steve job",
"fields":["title^10","overview"],
"operator": "or",
"type":"best_fields"
}
}
}
# 将其他因素以0.3的倍数考虑进去
# dismax模式 = best_fields模式
GET /movie/_search
{
"query":{
"dis_max": {
"queries": [
{ "match": { "title":"basketball with cartoom aliens"}},
{ "match": { "overview":"basketball with cartoom aliens"}}
],
"tie_breaker": 0.3
}
}
}
②most_fields:取命中的分值相加作为分数,同should match模式,加权共同影响模式
# 以字段为单位分别计算分词的分数,然后加在一起,适用于都有影响的匹配
GET /movie/_search
{
"query":{
"multi_match":{
"query":"basketball with cartoom aliens",
"fields":["title^10","overview^0.1"]
"type":"most_fields"
}
}
}
# 使用explain看下 ((title:steve title:job) | (overview:steve overview:job))~1.0,打分规则
GET /movie/_validate/query?explain
{
//"explain": true,
"query":{
"multi_match":{
"query":"steve job",
"fields":["title","overview"],
"operator": "or",
"type":"most_fields"
}
}
}
③cross_fields:以分词为单位计算栏位总分,词的权重较高
# 以词为单位,分别用词去不同的字段内取内容,拿高的分数后与其他词的分数相加,适用于词导向的匹配
GET /movie/_search
{
"query":{
"multi_match":{
"query":"steve job",
"fields":["title","overview"]
"type":"cross_fields"
}
}
}
# 使用explain看下 blended(terms:[title:steve, overview:steve]) blended(terms:[title:job, overview:job]),打分规则
GET /movie/_validate/query?explain
{
//"explain": true,
"query":{
"multi_match":{
"query":"steve job",
"fields":["title","overview"],
"operator": "or",
"type":"most_fields"
}
}
}
GET /forum/article/_search
{
"query": {
"multi_match": {
"query": "Peter Smith",
"type": "cross_fields",
"operator": "or",
"fields": ["author_first_name", "author_last_name"]
}
}
}
# 要求Peter必须在author_first_name或author_last_name中出现
# 要求Smith必须在author_first_name或author_last_name中出现
# 如果使用most_fiels,可能像Smith //Williams也可能会出现,因为most_fields要求只是任何一个field匹配了就可以,匹配的field越多,分数越高
query string:方便的利用AND(+) OR(|) NOT(-)
GET /movie/_search
{
"query":{
"query_string":{
"fields":["title"],
"query":"steve OR jobs"
}
}
}
filter单条件过滤查询
# 相当于数据库的where语句
GET /movie/_search
{
"query":{
"bool":{
"filter":{
"term":{"title":"steve"}
}
}
}
}
多条件过滤查询
# lte小于,gte大于
GET /movie/_search
{
"query":{
"bool":{
"filter":[
{"term":{"title":"steve"}},
{"term":{"cast.name":"gaspard"}},
{"range": {"release_date":{"lte":"2015/01/01"}}},
{"range": {"popularity":{"gte":"25"}}}
]
}
},
"sort":[
{"popularity":{"order":"desc"}}
]
}
带match打分的filter过滤查询
# must必须命中
GET /movie/_search
{
"query":{
"bool":{
"must":[
{"match":{"title":"Search"}},
{"match": {"tagline":"Elasticsearch"}}
],
"filter":[
{"term":{"title":"steve"}},
{"term":{"cast.name":"gaspard"}},
{"range":{"release_date":{"lte": "2015/01/01"}}},
{"range":{"popularity":{"gte": "25"}}}
]
}
}
}
# filter控制条件过滤 should控制打分参数
GET /movie/_search
{
"query":{
"bool":{
"should": [
{"match":{"title":"Search"}},
{"match":{"tagline":"Elasticsearch"}}
],
"filter":[
{"term":{"title":"steve"}},
{"term":{"cast.name":"gaspard"}},
{"range":{"release_date":{"lte":"2015/01/01"}}},
{"range":{"popularity": {"gte":"25"}}}
]
}
}
}
优秀的搜索引擎必备:
查全率:正确的结果有n个,查询出来正确的有m 则 m/n
查准率:查出的n个文档有m个正确,则m/n
两者都需要提高,但一般不可兼得,可以通过调整排序位置,将正确的结果排在上面以提高用户体验
# function score自定义打分
GET /movie/_search
{
"query":{
"function_score": {
//原始查询得到oldscore
"query": {
"multi_match":{
"query":"steve job",
"fields":["title","overview"],
"operator": "or",
"type":"most_fields"
}
},
"functions": [
{"field_value_factor": {
"field": "popularity", //对应要处理的字段
"modifier": "log2p", //将字段值+2后,计算对数
"factor": 10 //字段预处理*10
}
}
],
"score_mode": "sum", //不同的field value之间的得分相加
"boost_mode": "sum" //最后在与old value相加
}
}
}