boost 控制权重
GET /forum/article/_search
{
"query": {
"bool": {
"must": [
{"match": {
"title": "java"
}}
],
"should": [
{"match": { //这里使用的是match 所以后面才可以使用query,如果使用的是term那么field下面就是field(因为term是不分词的)
"title": {
"query": "elasticsearch",
"boost": 1 //权重要放在match下query平级,修饰这个query
}
}},
{"match": {
"title": {
"query": "hadoop",
"boost" : 5
}
}}
]
}
}
}
best fields 策略,dis_max关键字(尽可能的让单一field匹配的多个关键字的得分高些
GET /forum/article/_search
{
"query": { //如果是简单的查询,那么query下面可以直接是match,如果是多条件比如and or等,要使用bool关键字,
"bool": { //bool下面接must,must_not,should,这是一种操作
"should": [
{"match": {
"title": "java solution"
}},
{"match": {
"content" : "java solution"
}}
]
}
}
}
GET /forum/article/_search
{
"query": {
"dis_max": {
"queries": [ //queries居然有should的操作韵味,他一般放在dis_max下面
{"match": { //dis_max下面接queries这又是一组操作
"title": "java solution"
}},
{"match": {
"content": "java solution"
}}
]
}
}
}
dis_max只是去分数最高的那个query,如果使用了t ie_breaker,那么其他分数也会被计算在其中,只是其他的分数乘以了tie_breaker(tie_breaker是0到1的小数)
GET /forum/article/_search
{
"query": {
"dis_max": {
"queries": [
{"match": {
"title": "java beginner"
}},
{"match": {
"content": "java beginner"
}}
],
"tie_breaker": 0.7 //配合dis_max和queries使用,在dis_max和queries平级
}
}
}
去长尾操作,minimum_should_match
GET /forum/article/_search
{
"query": { //现在懂了,如果query下面直接跟match或者multi_match那么只能是一层的查询,多层的存在逻辑关系的,必须使用bool逻辑关系词
"multi_match": {
"query": "java solution",
"fields": ["title", "content"], //query,fields是multi_match下两个基本的,type,tie_breaker,minimum_should_match是用来修饰得分去长尾的(使搜索更加精确)
"type": "best_fields",
"tie_breaker": 0.3,
"minimum_should_match" : "50%"
}
}
}
most_fields 关键字匹配尽可能多的field, best_fields是一单独的field匹配尽可能多的关键字
GET /forum/article/_search
{
"query": {
"multi_match": {
"query": "learning courses",
"type": "most_fields",
"fields": [ "sub_title", "sub_title.std" ]
}
}
}
copy_to多个field复制集合成一个新field (但是这个field只是在内部隐藏着的)
PUT /forum/_mapping/article
{
"properties": {
"new_author_first_name" : {
"type": "string",
"copy_to": "new_author_full_name"
},
"new_author_last_name" : {
"type": "string",
"copy_to": "new_author_full_name"
},
"new_author_full_name" : {
"type": "string"
}
}
}
短语搜索:match_phrase 就是说该field中必须同时包含java 和 spark
GET /forum/article/_search
{
"query": {
"match_phrase": {
"content": "java spark"
}
}
}
proximity match
利用match_phrase + slop
GET /forum/article/_search
{
"query": {
"match_phrase": {
"content" : {
"query": "language java", //值得注意的是:这里两个单词组成的短语中,这两个单词其中一个移动一位是可以移动到另一个单词的前面的
"slop" : 4
}
}
}
}
match和match_phrase混合使用实现召回率和精确度平衡
GET /forum/article/_search
{
"query": {
"bool": {
"must": [
{"match" : { //保证召回率
"content": "java language"
}}
],
"should": [
{"match_phrase": { //贡献分数,保证精确度
"content": {
"query": "java language",
"slop" : 5
}
}}
]
}
}
}
resocring重打分
GET /forum/article/_search
{
"query": {
"match": {
"content": "java language"
}
},
"rescore" : {
"window_size" : 1,
"query" : {
"rescore_query" : {
"match_phrase" : {
"content" : {
"query" : "java language",
"slop" : 2
}
}
}
}
}
}
prefix 前缀搜索
GET /my_index/my_type/_search
{
"query": {
"prefix": {
"title": {
"value": "c3k"
}
}
}
}
wildcard 通配符搜索
GET /my_index/my_type/_search
{
"query": {
"wildcard": {
"title": {
"value": "c?k*"
}
}
}
}
regexp 正则
GET /my_index/my_type/_search
{
"query": {
"regexp" : {
"title" : {
"value" : "c[0-9].+"
}
}
}
}
match_phrase_prefix 搜索推荐
GET /my_index/my_type/_search
{
"query": {
"match_phrase_prefix": {
"title": {
"query" : "hello w",
"slop" : 2,
"max_expansions": 2
}
}
}
}
------------或者----------
GET /my_index/my_type/_search
{
"query": {
"match_phrase_prefix": {
"title": "hello w"
}
}
}
ngram 方式创建 搜索推荐
PUT /my_index
{
"settings": {
"analysis": {
"filter": {
"autocomplete_filter" : {
"type" : "edge_ngram",
"min_gram" : 1,
"max_gram" : 10
}
},
"analyzer": {
"autocomplete" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : [
"lowercase",
"autocomplete_filter"
]
}
}
}
}
}
PUT /my_index/_mapping/my_type
{
"properties": {
"title" : {
"type": "text",
"analyzer": "autocomplete", //在创建mapping的时候,一定要制定自定义的分词器
"search_analyzer": "standard"
}
}
}
------------------------------------
POST /my_index/_analyze //测试自定分词器
{
"text": "hello world",
"analyzer": "autocomplete"
}
GET /my_index/my_type/_search //搜索推荐
{
"query": {
"match_phrase": {
"title": "hello w"
}
}
}
搜索优化(4种)
---------------------------
//1重构查询结构
GET /forum/article/_search
{
"query": {
"bool": {
"must": [
{"term": {
"content": "java"
}}
],
"must_not": [
{"term": {
"content": "spark"
}}
]
}
}
}
-------------------------
//2.利用boosting,把negative部分的分数降下来
GET /forum/article/_search
{
"query": {
"boosting": {
"positive": {
"match": {
"content": "java"
}
},
"negative": {
"match": {
"content": "spark"
}
},
"negative_boost": 0.2
}
}
}
//bool和boosting是同一个等级的
//bool 下面放置的是must, must_not, should,boostring下面放置的是positive,negative,negative_boost
--------------------------
//3.利用boost
GET /forum/article/_search
{
"query": {
"bool": {
"should": [
{"match": {
"title": {
"query": "java spark",
"boost" : 0.2
}
}},
{"match": {
"content": "java"
}}
]
}
}
}
---------------------------
//4.constant_score 用来不用几分的field
//constant_score下面接query和filter的性质是一样的
GET /forum/article/_search
{
"query": {
"bool": {
"should": [
{"constant_score": {
"filter": {
"bool": {
"must_not": [
{"match": {
"title" : "java"
}}
]
}
}
}}
]
}
}
}
-----
GET /forum/article/_search
{
"query": {
"bool": {
"should": [
{"constant_score": {
"query": {
"match": {
"title": "java"
}
}
}},
{"constant_score": {
"query": {
"match": {
"title": "spark"
}
}
}}
]
}
}
}
自定义字段,参与几分
GET /forum/article/_search
{
"query": {
"function_score": {
"query": {
"multi_match": {
"query": "java spark",
"fields": ["title", "content"]
}
},
"field_value_factor": {
"field": "follower_num",
"modifier": "log1p",
"factor": 1.2
},
"boost_mode": "sum",
"max_boost": 0.5
}
}
}
fuzziness 误输入纠错
GET /my_index/my_type/_search
{
"query": {
"match": { //使用match下面接field
"text": {
"query": "SURPIZE ME",
"fuzziness": 2,
"operator": "and"
}
}
}
}
--------------
GET /my_index/my_type/_search
{
"query": {
"fuzzy": { //没有使用match直接使用的是field
"text" : {
"value": "surprize",
"fuzziness": 3
}
}
}
}
中文分词器的创建
PUT /my_index
{
"mappings": {
"my_type": {
"properties": {
"text" : {
"type": "text",
"analyzer": "ik_max_word"
}
}
}
}
}
-------
GET /my_index/_analyze //现在才注意到,GET 必须要使用大写,不然是没有提示的
{
"text" : "男子偷上万元发红包求交女友 被抓获时仍然单身",
"analyzer": "ik_max_word"
}
----
GET /my_index/my_type/_search
{
"query": {
"match": {
"text": "16岁少女结婚好还是单身好?"
}
}
}