DSL

# 创建索引
PUT /foodie-items/
{
  "settings":{
    "index":{
      "number_of_shards" : "1",
      "number_of_replicas" : "0"
    }
  }
}
# 查看索引
GET /_cat/indices?v
# 查看index/type中的数据
GET /forum/article/_search
# 查看index/type的映射
GET /forum/_mapping/article
# 通过_id查询
GET /forum/article/1
# 查看分词结果
GET /forum/_analyze
{
  "field": "articleID",
  "text": "XHDK-A-1293-#fJ3"
}
# _bulk批量增删改   mget批量查询
POST /forum/article/_bulk
{ "index": { "_id": 1 }}
{ "articleID" : "XHDK-A-1293-#fJ3", "userID" : 1, "hidden": false, "postDate": "2017-01-01" }
{ "index": { "_id": 2 }}
{ "articleID" : "KDKE-B-9947-#kL5", "userID" : 1, "hidden": false, "postDate": "2017-01-02" }
{ "index": { "_id": 3 }}
{ "articleID" : "JODL-X-1937-#pV7", "userID" : 2, "hidden": false, "postDate": "2017-01-01" }
{ "index": { "_id": 4 }}
{ "articleID" : "QQPX-R-3956-#aD8", "userID" : 2, "hidden": true, "postDate": "2017-01-02" }


# type=text,默认会设置两个field,一个是field本身,比如articleID,就是分词的;还有一个是field.keyword,默认不分词,会最多保留256个字符
# 搜索结果为空
GET /forum/article/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "term": {
          "articleID": "KDKE-B-9947-#kL5"
        }
      }
    }
  }
}
# 搜索到一条结果
GET /forum/article/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "term": {
          "articleID.keyword": "KDKE-B-9947-#kL5"
        }
      }
    }
  }
}

# 查询发帖日期为2017-01-01,或者帖子ID为XHDK-A-1293-#fJ3的帖子,同时要去发帖日期不为2017-01-02  select * from forum.article where (postDate = '2017-01-01' or articleID = 'XHDK-A-1293-#fJ3') and postDate != '2017-01-02';
GET /forum/article/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "should": [
            {
              "term": {
                "postDate": "2017-01-01"
              }
            },
            {
              "term": {
                "articleID": "XHDK-A-1293-#fJ3"
              }
            }
          ],
          "must_not": {
            "term": {
              "postDate": "2017-01-02"
            }
          }
        }
      }
    }
  }
}


# 查询帖子ID为XHDK-A-1293-#fJ3,或者帖子ID为JODL-X-1937-#pV7且发帖日期为2017-01-01的帖子 select * from forum.article where articleID = 'XHDK-A-1293-#fJ3' or (articleID = 'JODL-X-1937-#pV7' and postDate = '2017-01-01')
GET /forum/article/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "should": [
            {
              "term": {
                "articleID.keyword": "XHDK-A-1293-#fJ3"
              }
            },
            {
              "bool": {
                "must": [
                  {
                    "term": {
                      "articleID.keyword": "JODL-X-1937-#pV7"
                    }
                  },
                  {
                    "term": {
                      "postDate": "2017-01-01"
                    }
                  }
                ]
              }
            }
          ]
        }
      }
    }
  }
}

# term: {"field": "value"}
# terms: {"field": ["value1", "value2"]}

# 类似于sql中的in
# select * from tbl where col in ("value1", "value2")

# 为帖子数据批量增加tag字段
POST /forum/article/_bulk
{ "update": { "_id": "1"} }
{ "doc" : {"tag" : ["java", "hadoop"]} }
{ "update": { "_id": "2"} }
{ "doc" : {"tag" : ["java"]} }
{ "update": { "_id": "3"} }
{ "doc" : {"tag" : ["hadoop"]} }
{ "update": { "_id": "4"} }
{ "doc" : {"tag" : ["java", "elasticsearch"]} }

# 搜索帖子ID为KDKE-B-9947-#kL5或QQPX-R-3956-#aD8的帖子
GET /forum/article/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "terms": {
          "articleID.keyword": [
            "KDKE-B-9947-#kL5",
            "QQPX-R-3956-#aD8"
          ]
        }
      }
    }
  }
}

# 查询tag中包含java或者hadoop的帖子
GET /forum/article/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "terms": {
          "tag": [
            "java"
          ]
        }
      }
    }
  }
}

# 优化搜索结果,仅仅搜索tag只包含java的帖子
POST /forum/article/_bulk
{ "update": { "_id": "1"} }
{ "doc" : {"tag_cnt" : 2} }
{ "update": { "_id": "2"} }
{ "doc" : {"tag_cnt" : 1} }
{ "update": { "_id": "3"} }
{ "doc" : {"tag_cnt" : 1} }
{ "update": { "_id": "4"} }
{ "doc" : {"tag_cnt" : 2} }

GET /forum/article/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "must":[
              {"term":{"tag_cnt":1}},
              {"terms":{
                "tag":["java"]
              }}
            ]
        }
      }
    }
  }
}

# 为帖子数据增加浏览量的字段
POST /forum/article/_bulk
{ "update": { "_id": "1"} }
{ "doc" : {"view_cnt" : 30} }
{ "update": { "_id": "2"} }
{ "doc" : {"view_cnt" : 50} }
{ "update": { "_id": "3"} }
{ "doc" : {"view_cnt" : 100} }
{ "update": { "_id": "4"} }
{ "doc" : {"view_cnt" : 80} }

# 查询浏览量在30~60之间的帖子
GET /forum/article/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "range": {
          "view_cnt": {
            "gte": 30,
            "lte": 60
          }
        }
      }
    }
  }
}

# 查询发帖日期在最近1个月的帖子
POST /forum/article/_bulk
{ "index": { "_id": 5 }}
{ "articleID" : "DHJK-B-1395-#Ky5", "userID" : 3, "hidden": false, "postDate": "2020-04-20", "tag": ["elasticsearch"], "tag_cnt": 1, "view_cnt": 10 }

GET /forum/article/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "range": {
          "postDate": {
            "gte": "now-30d"
          }
        }
      }
    }
  }
}

# 为帖子增加标题字段
POST /forum/article/_bulk
{ "update": { "_id": "1"} }
{ "doc" : {"title" : "this is java and elasticsearch blog"} }
{ "update": { "_id": "2"} }
{ "doc" : {"title" : "this is java blog"} }
{ "update": { "_id": "3"} }
{ "doc" : {"title" : "this is elasticsearch blog"} }
{ "update": { "_id": "4"} }
{ "doc" : {"title" : "this is java, elasticsearch, hadoop blog"} }
{ "update": { "_id": "5"} }
{ "doc" : {"title" : "this is spark blog"} }

# 搜索标题中包含java或elasticsearch的blog
# 这个跟之前的那个term query不一样了。不是搜索exact value,是进行full text全文检索。
# match query,是负责进行全文检索的。如果要检索的field,是not_analyzed类型的,那么match query也相当于term query。
GET /forum/article/_search
{
  "query": {
    "match": {
      "title": "java elasticsearch"
    }
  }
}

# 搜索标题中包含java和elasticsearch的blog
# 使用and匹配所有搜索关键字
GET /forum/article/_search
{
  "query": {
    "match": {
      "title": {
        "query": "java elasticsearch",
        "operator": "and"
      }
    }
  }
}

# 搜索包含java,elasticsearch,spark,hadoop,4个关键字中,至少3个的blog
# 指定一些关键字中,必须至少匹配其中的多少个关键字,才能作为结果返回
GET /forum/article/_search
{
  "query": {
    "match": {
      "title": {
        "query": "java elasticsearch spark hadoop",
        "minimum_should_match": "75%"
      }
    }
  }
}
GET /forum/article/_search
{
  "query": {
    "bool": {
      "should": [
        { "match": { "title": "java" }},
        { "match": { "title": "elasticsearch"   }},
        { "match": { "title": "hadoop"   }},
	{ "match": { "title": "spark"   }}
      ],
      "minimum_should_match": 3 
    }
  }
}
#1、全文检索的时候,进行多个值的检索,有两种做法,match query;should
#2、控制搜索结果精准度:and operator,minimum_should_match
# match query进行多值搜索的时候,es会在底层自动将这个match query转换为bool的语法

# 搜索条件的权重,boost,可以将某个搜索条件的权重加大,此时当匹配这个搜索条件和匹配另一个搜索条件的document,计算relevance score时,匹配权重更大的搜索条件的document,relevance score会更高,当然也就会优先被返回回来
# 默认情况下,搜索条件的权重都是1
GET /forum/article/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "title": "blog"
          }
        }
      ],
      "should": [
        {
          "match": {
            "title": {
              "query": "java"
            }
          }
        },
        {
          "match": {
            "title": {
              "query": "elasticsearch",
              "boost": 2
            }
          }
        }
      ]
    }
  }
}

# 多字段搜索
# 为帖子数据增加content字段
POST /forum/article/_bulk
{ "update": { "_id": "1"} }
{ "doc" : {"content" : "i like to write best elasticsearch article"} }
{ "update": { "_id": "2"} }
{ "doc" : {"content" : "i think java is the best programming language"} }
{ "update": { "_id": "3"} }
{ "doc" : {"content" : "i am only an elasticsearch beginner"} }
{ "update": { "_id": "4"} }
{ "doc" : {"content" : "elasticsearch and hadoop are all very good solution, i am a beginner"} }
{ "update": { "_id": "5"} }
{ "doc" : {"content" : "spark is best big data solution based on scala ,an programming language similar to java"} }

# 搜索title或content中包含java或solution的帖子
GET /forum/article/_search
{
  "query": {
    "bool": {
      "should": [
        {
          "match": {
            "title": "java solution"
          }
        },
        {
          "match": {
            "content": "java solution"
          }
        }
      ]
    }
  }
}

# best fields策略,就是说,搜索到的结果,应该是某一个field中匹配到了尽可能多的关键词,被排在前面;而不是尽可能多的field匹配到了少数的关键词,排在了前面
# dis_max语法,直接取多个query中,分数最高的那一个query的分数即可
GET /forum/article/_search
{
  "query": {
    "dis_max": {
      "queries": [
        {
          "match": {
            "title": "java solution"
          }
        },
        {
          "match": {
            "content": "java solution"
          }
        }
      ]
    }
  }
}

03_结构化搜索_filter执行原理深度剖析(bitset机制与caching机制)

(1)在倒排索引中查找搜索串,获取document list

(2)为每个在倒排索引中搜索到的结果,构建一个bitset,[0, 0, 0, 1, 0, 1]

(3)遍历每个过滤条件对应的bitset,优先从最稀疏的开始搜索,查找满足所有条件的document

(4)caching bitset,跟踪query,在最近256个query中超过一定次数的过滤条件,缓存其bitset。对于小segment(<1000,或<3%),不缓存bitset。

(5)filter大部分情况下来说,在query之前执行,先尽量过滤掉尽可能多的数据

(6)如果document有新增或修改,那么cached bitset会被自动更新

(7)以后只要是有相同的filter条件的,会直接来使用这个过滤条件对应的cached bitset

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值