1.创建同义词库
elasticsearch-6.3.1\config\下创建文件夹analysis
在文件夹analysis下创建synonyms.txt文件
其中synonyms用于存放同义词库,如:
西红柿,番茄,土豆,马铃薯
社保,公积金
(注意逗号是英文的逗号)
2.创建索引
索引创建流程:setting--> mapping-->put
在kibana6.3.1版本页面中可以常见索引了,如下:
PUT /test
{
"settings": {
"index": {
"analysis": {
"analyzer": {
"jt_cn": {
"type": "custom",
"use_smart": "true",
"tokenizer": "ik_smart",
"filter": ["jt_tfr","jt_sfr"],
"char_filter": ["jt_cfr"]
},
"ik_smart": {
"type": "ik_smart",
"use_smart": "true"
},
"ik_max_word": {
"type": "ik_max_word",
"use_smart": "false"
}
},
"filter": {
"jt_tfr": {
"type": "stop",
"stopwords": [" "]
},
"jt_sfr": {
"type": "synonym",
"synonyms_path": "analysis/synonyms.txt"
}
},
"char_filter": {
"jt_cfr": {
"type": "mapping",
"mappings": [
"| => \\|"
]
}
}
}
}
}
}
注意标红的部分指定了同义词库的位置
3.创建映射
PUT /test/haizhi/_mapping
{
"haizhi": {
"properties": {
"title": {
"analyzer": "jt_cn",
"term_vector": "with_positions_offsets",
"boost": 8,
"store": true,
"type": "text"
}
}
}
}
注意标红处指定了刚才创建索引的分词器jt_cn
4.测试-构建模拟数据
PUT /test/haizhi/1
{
"title": "番茄"
}
PUT /test/haizhi/2
{
"title": "西红柿"
}
PUT /test/haizhi/3
{
"title": "我是西红柿"
}
PUT /test/haizhi/4
{
"title": "我是番茄"
}
PUT /test/haizhi/5
{
"title": "土豆"
}
PUT /test/haizhi/6
{
"title": "aa"
}
5.测试-是否能检索到同义词
POST /test/haizhi/_search?pretty
{
"query": {
"match_phrase": {
"title": {
"query": "西红柿",
"analyzer": "jt_cn"
}
}
},
"highlight": {
"pre_tags": [
"<tag1>",
"<tag2>"
],
"post_tags": [
"</tag1>",
"</tag2>"
],
"fields": {
"title": {}
}
}
}
结果如下,发现能成功检索到同义词哦!
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 5,
"max_score": 0.7520058,
"hits": [
{
"_index": "test",
"_type": "haizhi",
"_id": "2",
"_score": 0.7520058,
"_source": {
"title": "西红柿"
},
"highlight": {
"title": [
"<tag1>西红柿</tag1>"
]
}
},
{
"_index": "test",
"_type": "haizhi",
"_id": "4",
"_score": 0.646255,
"_source": {
"title": "我是番茄"
},
"highlight": {
"title": [
"我是<tag1>番茄</tag1>"
]
}
},
{
"_index": "test",
"_type": "haizhi",
"_id": "1",
"_score": 0.46029136,
"_source": {
"title": "番茄"
},
"highlight": {
"title": [
"<tag1>番茄</tag1>"
]
}
},
{
"_index": "test",
"_type": "haizhi",
"_id": "3",
"_score": 0.43648314,
"_source": {
"title": "我是西红柿"
},
"highlight": {
"title": [
"我是<tag1>西红柿</tag1>"
]
}
},
{
"_index": "test",
"_type": "haizhi",
"_id": "5",
"_score": 0.2876821,
"_source": {
"title": "土豆"
},
"highlight": {
"title": [
"<tag1>土豆</tag1>"
]
}
}
]
}
}