es使用pinyin搜索,对应中文没有高亮显示

2 篇文章 0 订阅

刚开始的时候,索引 article 的 mapping

{
    "mapping":{
        "article":{
            "mappings":{
                "properties":{
                    "_class":{
                        "type":"text",
                        "fields":{
                            "keyword":{
                                "type":"keyword",
                                "ignore_above":256
                            }
                        }
                    },
                    "content":{
                        "type":"text",
                        "fields":{
                            "pinyin":{
                                "type":"text",
                                "analyzer":"pinyin"
                            }
                        },
                        "analyzer":"ik_max_word"
                    },
                    "createTime":{
                        "type":"long"
                    },
                    "title":{
                        "type":"text",
                        "fields":{
                            "pinyin":{
                                "type":"text",
                                "analyzer":"pinyin"
                            }
                        },
                        "analyzer":"ik_max_word"
                    }
                }
            }
        }
    }
}

title,content字段,拼音搜索,中文搜索,都没有问题,就是高亮显示的时候,拼音对应的中文,没有高亮
POST article/_doc

{
    "title":"测试",
    "content":"迪斯科浪费空间四点零分了是对方身上的"
}

GET /article/_search

{
    "query":{
        "multi_match":{
            "query":"shenshang",
            "fields":[
                "content.pinyin"
            ]
        }
    },
    "highlight":{
        "pre_tags":[
            "<h1>"
        ],
        "post_tags":[
            "</h1>"
        ],
        "fields":{
            "content.pinyin":{

            }
        }
    }
}

结果:

"highlight" : {
"content.pinyin" : [
   <h1></h1><h1></h1> "迪斯科浪费空间四点零分了是对方身上的"
  ]

高亮的标签,都打在了前面,想实现的结果是 都在 对应中文上 高亮

解决方法 设置setting,自定义解析器

  1. 删除旧索引
    DELETE /article
  1. 创建一个test2的索引
PUT test2
{
	"settings": {
    "number_of_shards": 1,
    "number_of_replicas": 1
  }
}
  1. 设置setting
//设置setting,要先close
POST test2/_close
PUT test2/_settings
{
"settings":{
    "index":{
      "analysis":{
        "analyzer":{
          "ik_pinyin_analyzer":{
            "type":"custom",
            "tokenizer":"ik_max_word",
            "filter":["my_pinyin"]
          }
        },
        "filter":{
          "my_pinyin":{
             "type":"pinyin",
             //下面这些,非必须
             "keep_separate_first_letter": false,
             "keep_full_pinyin": true,
             "keep_original": false,
             "limit_first_letter_length": 10,
             "lowercase": true,
             "remove_duplicated_term": true
          }
        }
      }
    }
  }
}
  1. 设置mapping
PUT test2/_mappings
{
  "properties":{
    "id":{
      "type":"long"
    },
    "name":{
      "type":"text",
      "analyzer": "ik_pinyin_analyzer"
    }
  }
}
如果不设置settings的话,是找不到ik_pinyin_analyzer的,前提安装了pinyin的插件,之前就是 analyzer使用的 “pinyin"
  1. 放数据
    这样索引就设置成功了,要记得open
POST test2/_open

存放测试数据

POST test2/_doc
{
	"id":1,
	"name":"我们的时代"
}
  1. 查询
GET test2/_search
结果:
{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "test2",
        "_type" : "_doc",
        "_id" : "MbLjiXoBlp4EId7Q-Fv9",
        "_score" : 1.0,
        "_source" : {
          "id" : 1,
          "name" : "我们的时代"
        }
      }
    ]
  }
}

正常查询,是可以的
7. 拼音查询

GET test2/_search
{
  "query":{
    "match": {
      "name": "wmen的"
    }
  },
  "highlight": {
    "pre_tags": ["<h1>"],
    "post_tags": ["</h1>"],
    "fields": {
      "name":{}
    }
  }
}
结果:
{
  "took" : 3,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 0.78201365,
    "hits" : [
      {
        "_index" : "test2",
        "_type" : "_doc",
        "_id" : "MbLjiXoBlp4EId7Q-Fv9",
        "_score" : 0.78201365,
        "_source" : {
          "id" : 1,
          "name" : "我们的时代"
        },
        "highlight" : {
          "name" : [
            "<h1>我们</h1><h1>的</h1>时代"
          ]
        }
      }
    ]
  }
}

这样,highlight里面的name,就是对应中文高亮了。

所有请求,都是在 kibana中操作

我使用的是java springboot和es结合,项目中就设置setting就可以了

@Data
@Setting(settingPath = "es/setting.json")
@Document(indexName = "article",useServerConfiguration = true)
public class Article {

    @Id
    private String id;


    @MultiField(mainField = @Field(type = FieldType.Text, analyzer = "ik_max_word")
            , otherFields = @InnerField(suffix = "pinyin", type = FieldType.Text, analyzer = "ik_pinyin_analyzer"))
    private String title;

    @MultiField(mainField = @Field(type = FieldType.Text, analyzer = "ik_max_word")
            , otherFields = @InnerField(suffix = "pinyin", type = FieldType.Text, analyzer = "ik_pinyin_analyzer"))
    private String content;

    private Long createTime;
}

使用@Setting注解,在创建索引的时候,按照对应setting创建
setting.json位置 resources/es/setting.json
内容:

{
  "index": {
    "analysis": {
      "analyzer": {
        "ik_pinyin_analyzer": {
          "type": "custom",
          "tokenizer": "ik_max_word",
          "filter": [
            "my_pinyin"
          ]
        }
      },
      "filter": {
        "my_pinyin": {
          "type": "pinyin",
          "keep_separate_first_letter": false,
          "keep_full_pinyin": true,
          "keep_original": false,
          "limit_first_letter_length": 10,
          "lowercase": true,
          "remove_duplicated_term": true
        }
      }
    }
  }
}

内容就和刚才在kibana中操作的setting一样
对应 controller

 @GetMapping("/searchTitle")
    public Object searchTitle(@RequestParam(value = "title",required = false)String title){
        String preTag = "<font color='red'>";
        String postTag = "</font>";
        SortBuilder<FieldSortBuilder> sortBuilder  = SortBuilders.fieldSort("createTime").order(SortOrder.DESC);
        Query build = new NativeSearchQueryBuilder()
                .withSort(sortBuilder)
                .withQuery(QueryBuilders.multiMatchQuery(title, "title", "title.pinyin","content","content.pinyin"))
                .withHighlightFields(new HighlightBuilder.Field("title").preTags(preTag).postTags(postTag)
                        ,new HighlightBuilder.Field("content").preTags(preTag).postTags(postTag)
                        ,new HighlightBuilder.Field("title.pinyin").preTags(preTag).postTags(postTag)
                        ,new HighlightBuilder.Field("content.pinyin").preTags(preTag).postTags(postTag)
                        )
                .build();

        SearchHits<Article> search = elasticsearchRestTemplate.search(build, Article.class);
        return search;
    }

我搜索的,就是title,content和对应拼音
对应 kibana中代码

GET /article/_search
{
  "query":{
    "multi_match": {
        "query": "shenshang",
        "fields": ["title.pinyin","content.pinyin","title","content"]
     }
  },
  "highlight": {
    "pre_tags" : ["<tag1>"],
    "post_tags" : ["</tag1>"],
    "fields": {
      "title.pinyin": {},
      "content.pinyin": {},
      "content": {},
      "title": {}
    }
  }
}

java中结果 接口传参为 disike

{
    "totalHits": 1,
    "totalHitsRelation": "EQUAL_TO",
    "maxScore": "NaN",
    "scrollId": null,
    "searchHits": [
        {
            "index": "article",
            "id": "67IJinoBlp4EId7QBl4Z",
            "score": "NaN",
            "sortValues": [
                "1625813486007"
            ],
            "content": {
                "id": "67IJinoBlp4EId7QBl4Z",
                "title": "测试",
                "content": "迪斯科浪费空间四点零分了是对方身上的",
                "createTime": "1625813486007"
            },
            "highlightFields": {
                "content.pinyin": [
                    "<font color='red'>迪斯科</font>浪费空间<font color='red'>四</font>点零分了是对方身上的"
                ]
            },
            "innerHits": {},
            "nestedMetaData": null
        }
    ],
    "aggregations": null,
    "empty": false
}

对 es 理解的还不是很深,有不对的地方,欢迎指出

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值