elasticsearch ik配置动态同义词

使用环境

安装插件

下载对应的elasticsearch-analysis-dynamic-synonym-5.1.1.zip, 解压到本地的elasticsearch/plugins目录下, 重新启动es

第一种方式本地文件

说明:
  • 对于本地文件:主要通过文件的修改时间戳(Modify time)来判断是否要重新加载
  • 在elasticsearch/config目录下,建立analysis目录, 并在analysis目录下放入synonym.txt, 在文件首行加入下面一行同义词,来进行测试
    西红柿, 番茄, 圣女

es设置索引和自定义解析器

PUT /megacorp
{
  "mappings": {
    "employee": {
      "properties": {
        "name":{
          "type": "text",
          "analyzer": "ik-index",  //指定索引时候用的解析器
          "search_analyzer": "ik-smart" //指定搜索时候用的解析器
        }
      }
    }
  }
  ,
  "settings": {
    "analysis": {
      "filter": {
        "local_synonym" : {
            "type" : "dynamic_synonym",
            "synonyms_path" : "analysis/synonym.txt"  
        }
      },
      "analyzer": {
        "ik-index": {
          "type": "custom",
          "tokenizer": "ik_max_word",
          "filter": [
              "local_synonym"   //对同义词进行了过滤
           ]
        },
        "ik-smart": {
          "type": "custom",
          "tokenizer": "ik_smart",
          "filter": [
              "local_synonym"
           ]
        }
      }
    }
  }
}
设置好后,可以用以下命令查看,同义词是否配置成功
GET /megacorp/_analyze
{
  "analyzer": "ik-index",
  "text": "西红柿"
}
正确分词结果如下:
{
  "tokens": [
    {
      "token": "西红柿",
      "start_offset": 0,
      "end_offset": 3,
      "type": "CN_WORD",
      "position": 0
    },
    {
      "token": "番茄",
      "start_offset": 0,
      "end_offset": 3,
      "type": "SYNONYM",
      "position": 0
    },
    {
      "token": "圣女",
      "start_offset": 0,
      "end_offset": 3,
      "type": "SYNONYM",
      "position": 0
    }
  ]
}
插入2条数据:
PUT /megacorp/employee/1
{
    "name" : "圣女果"
}

PUT /megacorp/employee/2
{
    "name" : "番茄"
}
搜索西红柿, 会搜索出番茄和圣女果的记录:
GET /megacorp/employee/_search
{
    "query":{
      "match": {
        "name": "西红柿"
      } 
    }
}

第二种方式远程接口

说明:
  • 这个http请求需要返回两个头部,一个是 Last-Modified,一个是 ETag,只要有一个发生变化,该插件就会去获取新的同义词来更新相应的同义词。
本地写个接口
http://localhost/synonym/list 
该接口返回的需要设置以下三个属性
$response->setLastModified($lastModified);
$response->setEtag($etag, true);
$response->headers->set('Content-Type', 'text/plain');
注:
nginx 在开启了 gzip 之后,如果有 ETAG 则会调用 ngx_http_clear_etag 将其清除,
解决的办法很简单:
只要 PHP 返回的 ETAG 是 weak ETAG,那么就一切都会正常起来了。而所谓的 weak ETAG,也就是弱 ETAG,它是相对于正常 ETAG 而言的,表现形式就是 ETAG 前面加上 W/
W/"db8b38e8a3257a2f195b727eceb2c5d3"
下面是设置远程, 本地同义词的配置

PUT /megacorp
{
  "settings": {
    "index": {
      "number_of_shards": "5",
      "number_of_replicas": "1",
      "analysis": {
        "filter": {
          "remote_synonym": {
            "type": "dynamic_synonym",
            "synonyms_path": "http://192.168.78.37:10001/synonym.txt",
            "interval": 10
          },
          "edgeNgramFilter": {
            "type": "edge_ngram",
            "min_gram": 1,
            "max_gram": 50
          }
        },
        "analyzer": {
          "ngramIndex": {
            "type": "custom",
            "tokenizer": "keyword",
            "filter": [
              "edgeNgramFilter",
              "lowercase"
            ]
          },
          "ngramLowercase": {
            "filter": [
              "lowercase"
            ],
            "type": "custom",
            "tokenizer": "ngramTokenizer"
          },
          "ikIndex": {
            "filter": [
              "remote_synonym"
            ],
            "type": "custom",
            "tokenizer": "ik_max_word"
          },
          "ikSearch": {
            "type": "custom",
            "tokenizer": "ik_smart"
          },
          "keywordLowercase": {
            "type": "custom",
            "filter": [
              "lowercase"
            ],
            "tokenizer": "keyword"
          }
        },
        "tokenizer": {
          "ngramTokenizer": {
            "type": "nGram",
            "min_gram": "1",
            "max_gram": "3"
          }
        }
      }
    }
  },
  "mappings": {
    "type1": {
      "properties": {
        "title": {
          "type": "text",
          "fields": {
            "ik": {
              "analyzer": "ikIndex",
              "search_analyzer": "ikSearch",
              "type": "text"
            },
            "keywordLowercase": {
              "analyzer": "keywordLowercase",
              "type": "text"
            },
            "ngram": {
              "analyzer": "ngramLowercase",
              "type": "text"
            }
          }
        }
      }
    }
  }
}

{ "mappings": { "employee": { "properties": { "name":{ "type": "text", "analyzer": "ik-index", //指定索引时候用的解析器 "search_analyzer": "ik-smart" //指定搜索时候用的解析器 } } } } , "settings": { "analysis": { "filter": { "remote_synonym": { "type" : "dynamic_synonym", "synonyms_path" : "http://localhost/synonym/list", "interval": 60 // 没60s调取一次接口 }, "local_synonym" : { "type" : "dynamic_synonym", "synonyms_path" : "analysis/synonym.txt" } }, "analyzer": { "ik-index": { "type": "custom", "tokenizer": "ik_max_word", "filter": [ "remote_synonym", //对远程同义词进行了过滤 "local_synonym" //对本地同义词进行了过滤 ] }, "ik-smart": { "type": "custom", "tokenizer": "ik_smart", "filter": [ "local_synonym" ] } } } }
  • 3
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值