SpringBoot集成elasticsearch实现汉字、拼音、同义词检索并高亮显示

一、安装ik分词器与拼音分词器插件

ik分析器下载地址:https://github.com/infinilabs/analysis-ik/releases

pinyin分词器下载地址:https://github.com/infinilabs/analysis-pinyin/releases

下载与es对应版本插件,解压zip包到es安装目录的plugins文件夹下

二、自定义索引映射

1、创建索引映射文件file-mapping.json

{
  "dynamic": false,
  "properties": {
    "name": {
      "type": "text",
      "analyzer": "ik_smart",
      "fields": {
        //拼音分词器
        "pinyin": {
          "type": "text",
          "analyzer": "pinyin_analyzer",
          "search_analyzer": "ik_smart"
        },
        //同义词分词器
        "synonymy": {
          "type": "text",
          "analyzer": "synonym_analyzer",
          "search_analyzer": "ik_smart"
        }
      }
    },
    "id": {
      "type": "text"
    },
    "fileContent": {
      "type": "text",
      "analyzer": "ik_smart",
      "fields": {
        "pinyin": {
          "type": "text",
          "analyzer": "pinyin_analyzer",
          "search_analyzer": "ik_smart"
        },
        "synonymy": {
          "type": "text",
          "analyzer": "synonym_analyzer",
          "search_analyzer": "ik_smart"
        }
      }
    }
  }
}

2、创建索引设置文件file-setting.json

{
  "index": {
    "number_of_shards": 1,
    "number_of_replicas": 1
  },
  "analysis": {
    "analyzer": {
       //自定义过滤器
      "pinyin_analyzer": {
        "tokenizer": "ik_max_word",
        "filter": "py"
      },
       //自定义过滤器
      "synonym_analyzer": {
        "tokenizer": "ik_max_word",
        "filter": [
          "synonym",
          "lowercase"
        ]
      }
    },
    "filter": {
      "py": {
        "type": "pinyin",    //设置类型拼音分词器,即它会将输入的中文文本转换为拼音形式
        "keep_full_pinyin": true,    //true:保留完整的拼音
        "keep_joined_full_pinyin": true,    //true:生成连续的完整拼音字符串
        "keep_first_letter": true,    //true:保留每个词汇拼音的首字母
        "keep_separate_first_letter": false,    //false:不保留每个汉字拼音的首字母作为单独的分词单元
        "keep_original": true,    //true:在生成拼音的同时,也保留原始的中文文本
        "remove_duplicated_term": true,    //true:在分词结果中去除重复的项
        "none_chinese_pinyin_tokenize": true,    //true:表示对非中文字符(如英文、数字、标点符号等)也进行拼音化处理
        "lowercase": true    //true:将生成的拼音转换为小写
      },
      "synonym": {
        "type": "synonym_graph",
        "synonyms_path": "analysis/synonyms.txt"    //配置同义词文本路径
      }
    }
  }
}

3、配置同义词文本文件analysis/synonyms.txt到es安装目录的config文件夹下

按照,依照,遵照,遵守,服从,根据,依据,遵从,遵循
依照,按照,遵照,遵守,根据,依据,遵从,遵循
遵照,按照,依照,遵守,恪守,服从,根据,坚守,遵从,遵循,奉命,遵命,听命,从命
安置,安排,安顿,部署,布置,计划,安装,安放,安设
安排,安置,安顿,部署,布置,计划,调节,左右,调整,操纵,摆布,支配,摆设,陈设,打算,调度,调理,放置,铺排,设计,调动,策画,张罗
安顿,安置,安排,部署,布置,计划,放置,安插,安放
暴发,爆发 
爆发,暴发,迸发,产生,发生,发作,爆出
变换,变幻,变更,调换,改变,改动,改换,转换
变幻,变换,幻化
辩别,辨别,辨认,鉴别

4、创建es索引实体类

import io.swagger.annotations.ApiModelProperty;
import lombok.Data;
import lombok.experimental.Accessors;
import org.springframework.data.annotation.Id;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Mapping;
import org.springframework.data.elasticsearch.annotations.Setting;

import java.util.List;

@Document(indexName = FileEsInfo.INDEX_NAME)
@Data
@Accessors(chain = true)
@Setting(settingPath = "/es/film-setting.json")
@Mapping(mappingPath = "/es/film-mapping.json")
public class FileEsInfo {

    public static final String INDEX_NAME = "file_content";

    @ApiModelProperty("编号")
    @Id
    private String id;

    @ApiModelProperty("文件名称")
    private String name;

    @ApiModelProperty("文件内容")
    private String fileContent;
}

三、测试es构建分析器检索效果

(一)测试拼音分词器效果

1、测试拼音检索

2、测试汉字检索

3、测试 拼音汉字混合检索

 (二)测试同义词分词器效果

四、SpringBoot集成es实现搜索

(一)添加依赖

<dependency>
        <groupId>org.elasticsearch.client</groupId>
        <artifactId>elasticsearch-rest-high-level-client</artifactId>
        <version>7.17.16</version>
</dependency>

(二) 实现搜索并添加高亮

import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import com.fasterxml.jackson.databind.ObjectMapper;

@Resource
private RestHighLevelClient restHighLevelClient;

@Resource
private ObjectMapper objectMapper;

@Override
public List<FileEsInfo> getFileInfoList(String keyword) throws IOException {
        List<FileEsInfo> list = new ArrayList<>();
        // 创建搜索请求
        SearchRequest searchRequest = new SearchRequest("file_content");
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        // 构建bool查询
        BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
        if (StringUtils.isNotEmpty(keyword)){
            //使用同义词分词器
            boolQueryBuilder.should(QueryBuilders.matchQuery("name.synonymy", keyword));
            boolQueryBuilder.should(QueryBuilders.matchQuery("fileContent.synonymy", keyword));
            //使用拼音分词器
            boolQueryBuilder.should(QueryBuilders.matchQuery("name.pinyin", keyword));
            boolQueryBuilder.should(QueryBuilders.matchQuery("fileContent.pinyin", keyword));
            
            boolQueryBuilder.minimumShouldMatch(1);
        }
        searchSourceBuilder.query(boolQueryBuilder);
        // 添加高亮显示
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        highlightBuilder.preTags("<em>");
        highlightBuilder.postTags("</em>");
        highlightBuilder.field("fileContent.synonymy");
        highlightBuilder.field("fileContent.pinyin");
        highlightBuilder.field("name.synonymy");
        highlightBuilder.field("name.pinyin");
        searchSourceBuilder.highlighter(highlightBuilder);
        searchRequest.source(searchSourceBuilder);
        // 执行搜索
        SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        // 处理响应
        for (SearchHit hit : searchResponse.getHits().getHits()) {
            FileEsInfo fileEsInfo = objectMapper.convertValue(hit.getSourceAsMap(), FileEsInfo.class);
            fileEsInfo = handlerHighlight(fileEsInfo, hit);
            list.add(fileEsInfo);
        }
        return list;
    }

//处理高亮片段
private FileEsInfo handlerHighlight(FileEsInfo fileEsInfo, SearchHit hit){
        String fileContent = fileEsInfo.getFileContent();
        String newContent = "";
        boolean flag = true;
        // 处理高亮字段,标题完整显示,内容选取第一个高亮片段
        if (hit.getHighlightFields() != null) {
            HighlightField synonymContentField = hit.getHighlightFields().get("fileContent.synonymy");
            if (synonymContentField != null) {
                newContent = synonymContentField.getFragments()[0].toString();
                flag = false;
            }
            HighlightField pinyinContentField = hit.getHighlightFields().get("fileContent.pinyin");
            if (pinyinContentField != null) {
                newContent = pinyinContentField.getFragments()[0].toString();
                flag = false;
            }
            HighlightField synonymNameField = hit.getHighlightFields().get("name.synonymy");
            if (synonymNameField != null) {
                StringBuilder synonymName = new StringBuilder();
                Text[] fragments = synonymNameField.getFragments();
                for (Text fragment :fragments) {
                    synonymName.append(fragment.toString());
                }
                fileEsInfo.setName(synonymName.toString());
            }
            HighlightField pinyinNameField = hit.getHighlightFields().get("name.pinyin");
            if (pinyinNameField != null) {
                StringBuilder pinyinName = new StringBuilder();
                Text[] fragments = pinyinNameField.getFragments();
                for (Text fragment :fragments) {
                    pinyinName.append(fragment.toString());
                }
                fileEsInfo.setName(pinyinName.toString());
            }
        }
        if (flag){
            newContent = fileContent.substring(0, Math.min(fileContent.length(), 30));
        }
        fileEsInfo.setFileContent(newContent);
        return fileEsInfo;
    }

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值