一、安装ik分词器与拼音分词器插件
ik分析器下载地址:https://github.com/infinilabs/analysis-ik/releases
pinyin分词器下载地址:https://github.com/infinilabs/analysis-pinyin/releases
下载与es对应版本插件,解压zip包到es安装目录的plugins文件夹下
二、自定义索引映射
1、创建索引映射文件file-mapping.json
{
"dynamic": false,
"properties": {
"name": {
"type": "text",
"analyzer": "ik_smart",
"fields": {
//拼音分词器
"pinyin": {
"type": "text",
"analyzer": "pinyin_analyzer",
"search_analyzer": "ik_smart"
},
//同义词分词器
"synonymy": {
"type": "text",
"analyzer": "synonym_analyzer",
"search_analyzer": "ik_smart"
}
}
},
"id": {
"type": "text"
},
"fileContent": {
"type": "text",
"analyzer": "ik_smart",
"fields": {
"pinyin": {
"type": "text",
"analyzer": "pinyin_analyzer",
"search_analyzer": "ik_smart"
},
"synonymy": {
"type": "text",
"analyzer": "synonym_analyzer",
"search_analyzer": "ik_smart"
}
}
}
}
}
2、创建索引设置文件file-setting.json
{
"index": {
"number_of_shards": 1,
"number_of_replicas": 1
},
"analysis": {
"analyzer": {
//自定义过滤器
"pinyin_analyzer": {
"tokenizer": "ik_max_word",
"filter": "py"
},
//自定义过滤器
"synonym_analyzer": {
"tokenizer": "ik_max_word",
"filter": [
"synonym",
"lowercase"
]
}
},
"filter": {
"py": {
"type": "pinyin", //设置类型拼音分词器,即它会将输入的中文文本转换为拼音形式
"keep_full_pinyin": true, //true:保留完整的拼音
"keep_joined_full_pinyin": true, //true:生成连续的完整拼音字符串
"keep_first_letter": true, //true:保留每个词汇拼音的首字母
"keep_separate_first_letter": false, //false:不保留每个汉字拼音的首字母作为单独的分词单元
"keep_original": true, //true:在生成拼音的同时,也保留原始的中文文本
"remove_duplicated_term": true, //true:在分词结果中去除重复的项
"none_chinese_pinyin_tokenize": true, //true:表示对非中文字符(如英文、数字、标点符号等)也进行拼音化处理
"lowercase": true //true:将生成的拼音转换为小写
},
"synonym": {
"type": "synonym_graph",
"synonyms_path": "analysis/synonyms.txt" //配置同义词文本路径
}
}
}
}
3、配置同义词文本文件analysis/synonyms.txt到es安装目录的config文件夹下
按照,依照,遵照,遵守,服从,根据,依据,遵从,遵循
依照,按照,遵照,遵守,根据,依据,遵从,遵循
遵照,按照,依照,遵守,恪守,服从,根据,坚守,遵从,遵循,奉命,遵命,听命,从命
安置,安排,安顿,部署,布置,计划,安装,安放,安设
安排,安置,安顿,部署,布置,计划,调节,左右,调整,操纵,摆布,支配,摆设,陈设,打算,调度,调理,放置,铺排,设计,调动,策画,张罗
安顿,安置,安排,部署,布置,计划,放置,安插,安放
暴发,爆发
爆发,暴发,迸发,产生,发生,发作,爆出
变换,变幻,变更,调换,改变,改动,改换,转换
变幻,变换,幻化
辩别,辨别,辨认,鉴别
4、创建es索引实体类
import io.swagger.annotations.ApiModelProperty;
import lombok.Data;
import lombok.experimental.Accessors;
import org.springframework.data.annotation.Id;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Mapping;
import org.springframework.data.elasticsearch.annotations.Setting;
import java.util.List;
@Document(indexName = FileEsInfo.INDEX_NAME)
@Data
@Accessors(chain = true)
@Setting(settingPath = "/es/film-setting.json")
@Mapping(mappingPath = "/es/film-mapping.json")
public class FileEsInfo {
public static final String INDEX_NAME = "file_content";
@ApiModelProperty("编号")
@Id
private String id;
@ApiModelProperty("文件名称")
private String name;
@ApiModelProperty("文件内容")
private String fileContent;
}
三、测试es构建分析器检索效果
(一)测试拼音分词器效果
1、测试拼音检索
2、测试汉字检索
3、测试 拼音汉字混合检索
(二)测试同义词分词器效果
四、SpringBoot集成es实现搜索
(一)添加依赖
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.17.16</version>
</dependency>
(二) 实现搜索并添加高亮
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import com.fasterxml.jackson.databind.ObjectMapper;
@Resource
private RestHighLevelClient restHighLevelClient;
@Resource
private ObjectMapper objectMapper;
@Override
public List<FileEsInfo> getFileInfoList(String keyword) throws IOException {
List<FileEsInfo> list = new ArrayList<>();
// 创建搜索请求
SearchRequest searchRequest = new SearchRequest("file_content");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
// 构建bool查询
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
if (StringUtils.isNotEmpty(keyword)){
//使用同义词分词器
boolQueryBuilder.should(QueryBuilders.matchQuery("name.synonymy", keyword));
boolQueryBuilder.should(QueryBuilders.matchQuery("fileContent.synonymy", keyword));
//使用拼音分词器
boolQueryBuilder.should(QueryBuilders.matchQuery("name.pinyin", keyword));
boolQueryBuilder.should(QueryBuilders.matchQuery("fileContent.pinyin", keyword));
boolQueryBuilder.minimumShouldMatch(1);
}
searchSourceBuilder.query(boolQueryBuilder);
// 添加高亮显示
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.preTags("<em>");
highlightBuilder.postTags("</em>");
highlightBuilder.field("fileContent.synonymy");
highlightBuilder.field("fileContent.pinyin");
highlightBuilder.field("name.synonymy");
highlightBuilder.field("name.pinyin");
searchSourceBuilder.highlighter(highlightBuilder);
searchRequest.source(searchSourceBuilder);
// 执行搜索
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
// 处理响应
for (SearchHit hit : searchResponse.getHits().getHits()) {
FileEsInfo fileEsInfo = objectMapper.convertValue(hit.getSourceAsMap(), FileEsInfo.class);
fileEsInfo = handlerHighlight(fileEsInfo, hit);
list.add(fileEsInfo);
}
return list;
}
//处理高亮片段
private FileEsInfo handlerHighlight(FileEsInfo fileEsInfo, SearchHit hit){
String fileContent = fileEsInfo.getFileContent();
String newContent = "";
boolean flag = true;
// 处理高亮字段,标题完整显示,内容选取第一个高亮片段
if (hit.getHighlightFields() != null) {
HighlightField synonymContentField = hit.getHighlightFields().get("fileContent.synonymy");
if (synonymContentField != null) {
newContent = synonymContentField.getFragments()[0].toString();
flag = false;
}
HighlightField pinyinContentField = hit.getHighlightFields().get("fileContent.pinyin");
if (pinyinContentField != null) {
newContent = pinyinContentField.getFragments()[0].toString();
flag = false;
}
HighlightField synonymNameField = hit.getHighlightFields().get("name.synonymy");
if (synonymNameField != null) {
StringBuilder synonymName = new StringBuilder();
Text[] fragments = synonymNameField.getFragments();
for (Text fragment :fragments) {
synonymName.append(fragment.toString());
}
fileEsInfo.setName(synonymName.toString());
}
HighlightField pinyinNameField = hit.getHighlightFields().get("name.pinyin");
if (pinyinNameField != null) {
StringBuilder pinyinName = new StringBuilder();
Text[] fragments = pinyinNameField.getFragments();
for (Text fragment :fragments) {
pinyinName.append(fragment.toString());
}
fileEsInfo.setName(pinyinName.toString());
}
}
if (flag){
newContent = fileContent.substring(0, Math.min(fileContent.length(), 30));
}
fileEsInfo.setFileContent(newContent);
return fileEsInfo;
}