ElasticSearch中实现模糊查询效果(类似数据库中like功能)
场景:
业务要求提供一个es查询功能,实现类似模糊查询效果,并且命中字段显示红色。举例说明:
es中字段内容 | 输入(即关键字) | 是否输出 |
---|---|---|
你好,中国,强大的祖国 | 中国 | 是(则‘中国’两个字飘红) |
你好,中国,强大的祖国 | 俄国 | 否 |
你好,中国,强大的祖国 | 最大 | 否 |
实现方式:
这种实现方式主要是用es的query_string查询方式,不过需要对输入条件做区分处理才能实现模糊查询效果。
首先,先复习一下query_string查询方式的特点:
{
"query": {
"query_string": {
"query": "中国"
}
},
"size": 10,
"from": 0,
"sort": []
}
pom依赖:
我用的elasticsearch版本是6.8.1,springboot版本是2.3.5.RELEASE,因此pom依赖的版本不对,需要先排除再引入正确的包。
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
<exclusions>
<exclusion>
<artifactId>spring-data-elasticsearch</artifactId>
<groupId>org.springframework.data</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.springframework.data</groupId>
<artifactId>spring-data-elasticsearch</artifactId>
<version>3.2.6.RELEASE</version>
</dependency>
代码:
import org.apache.commons.lang3.StringUtils;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.core.CountRequest;
import org.elasticsearch.client.core.CountResponse;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
/**
* 弹性搜索核心服务
*
* @author lukou
* @date 2023/05/15
*/
@Service
public class ElasticSearchCoreService {
/**
* 常量和
*/
private static final String CONSTANT_AND = " AND ";
/**
* 常量不
*/
private static final String CONSTANT_NOT = " NOT ";
/**
* 常量或
*/
private static final String CONSTANT_OR = " OR ";
@Resource
private RestHighLevelClient restHighLevelClient;
/**
* 通过query_string方法查询统计
*
* @param index 指数
* @param keyword 关键字
* @return {@link CountResponse}
* @throws IOException ioexception
*/
public CountResponse queryCount(String index, String keyword) throws IOException {
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.query(this.queryString(keyword));
CountRequest request = new CountRequest(index);
request.source(searchSourceBuilder);
return restHighLevelClient.count(request, RequestOptions.DEFAULT);
}
/**
* 通过query_string方法查询搜索
*
* @param index 指数
* @param keyword 关键字
* @param from 起始位置
* @param size 大小
* @return {@link SearchResponse}
* @throws IOException ioexception
*/
public SearchResponse querySearch(String index, String keyword, int from, int size) throws IOException {
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.query(this.queryString(keyword)).highlighter(redHighlightBuilder())
.from(from)
.size(size);
SearchRequest request = new SearchRequest(index);
// preference解决分页数据不准确的问题(和分片有关系)
request.source(searchSourceBuilder).preference(String.valueOf(keyword.hashCode()));
return restHighLevelClient.search(request, RequestOptions.DEFAULT);
}
/**
* 通过query_string方法查询字符串
* 按照逻辑表达式切割(AND OR NOT)
*
* <pre>
* "123 AND abc"
* "123 OR abc"
* "123 NOT abc"
* "NOT 123 NOT abc"
* </pre>
*
* @param key 关键
* @return {@link QueryBuilder}
*/
public QueryBuilder queryString(String key) {
//Bool查找
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
if (StringUtils.contains(key, CONSTANT_AND)) {
// 包含AND
String[] keys = key.split(CONSTANT_AND);
for (String s : keys) {
boolQueryBuilder.must(QueryBuilders.queryStringQuery(convert(s)));
}
return boolQueryBuilder;
} else if (StringUtils.contains(key, CONSTANT_OR)) {
String[] keys = key.split(CONSTANT_OR);
for (String s : keys) {
boolQueryBuilder.should(QueryBuilders.queryStringQuery(convert(s)));
}
return boolQueryBuilder;
} else if (StringUtils.contains(key, CONSTANT_NOT)) {
List<String> keys = new ArrayList<>(Arrays.asList(key.split(CONSTANT_NOT)));
if (keys.get(0).startsWith("NOT ")) {
keys.set(0, keys.get(0).replaceAll("Not ", ""));
for (String s : keys) {
boolQueryBuilder.mustNot(QueryBuilders.queryStringQuery(convert(s)));
}
return boolQueryBuilder;
}
String keyword = keys.remove(0);
boolQueryBuilder.must(QueryBuilders.queryStringQuery(convert(keyword)));
for (String s : keys) {
boolQueryBuilder.mustNot(QueryBuilders.queryStringQuery(convert(s)));
}
return boolQueryBuilder;
}
boolQueryBuilder.must(QueryBuilders.queryStringQuery(convert(key)));
return boolQueryBuilder;
}
/**
* 转换
* 判断是不是字母、数字、汉字
*
* @param key 关键
* @return {@link String}
*/
public String convert(String key) {
//在执行查询时,搜索的词不会被分词器分词,而是直接以一个短语的形式查询
String res = "\"" + key + "\"";
if (key.matches("^[A-Za-z0-9]*$")) {
res = "*" + key + "*";
}
if (key.matches("^[\u4e00-\u9fa5][A-Za-z0-9]*$")) {
res = key + "*";
}
return res;
}
/**
* 红色突出显示生成器
*
* @return {@link HighlightBuilder}
*/
public HighlightBuilder redHighlightBuilder() {
HighlightBuilder highlightBuilder = new HighlightBuilder();
//高亮的字段
highlightBuilder.field("*");
//是否多个字段都高亮
highlightBuilder.requireFieldMatch(true);
//前缀后缀
highlightBuilder.preTags("<span style='color:red'>");
highlightBuilder.postTags("</span>");
return highlightBuilder;
}
/**
* 构建突出标记
*
* @param hits 支安打
* @return {@link List}<{@link Map}<{@link String}, {@link Object}>>
*/
public List<Map<String, Object>> buildHighlightTags(SearchHit[] hits) {
//解析结果
List<Map<String, Object>> result = new LinkedList<>();
for (SearchHit hit : hits) {
//解析高亮的字段
//获取高亮字段
Map<String, HighlightField> highlightFields = hit.getHighlightFields();
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
for (String s : highlightFields.keySet()) {
if (s != null) {
sourceAsMap.put(s, highlightFields.get(s).getFragments()[0].toString());//替换掉原来的内容
}
}
result.add(sourceAsMap);
}
return result;
}
}
测试:
import org.elasticsearch.action.search.SearchResponse;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RestController;
import javax.annotation.Resource;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@RestController
public class TestController {
@Resource
private ElasticSearchCoreService elasticSearchCoreService;
@GetMapping("/test")
public Map<String, Object> test(String index, String keyword) throws IOException {
SearchResponse searchResponse = elasticSearchCoreService.querySearch(index, keyword, 0, 10);
long total = searchResponse.getHits().getTotalHits();
List<Map<String, Object>> mapList = elasticSearchCoreService.buildHighlightTags(searchResponse.getHits().getHits());
Map<String, Object> result = new HashMap<>();
result.put("total", total);
result.put("data", mapList);
return result;
}
}
造数据:
新建索引tmp_1以及插入5条数据
PUT http://localhost:9200/tmp_1
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"_doc": {
"properties": {
"@timestamp": {
"type": "date"
},
"@version": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "keyword"
},
"sfz": {
"type": "text"
},
"content": {
"type": "text"
},
"address": {
"type": "text"
}
}
}
}
}
POST http://localhost:9200/tmp_1/_doc
{
"@version": "1",
"@timestamp": "2020-06-19T09:06:26.446Z",
"name": "唐伯虎",
"sfz": "3212111990018989",
"content": "当你无话可说的时候就别说话,在你不知如何回答别人的话的时候就保持沉默,这就是生活中一个很好的策略",
"address": "江苏省南京市"
}
POST http://localhost:9200/tmp_1/_doc
{
"@version": "1",
"@timestamp": "2020-06-19T09:06:26.446Z",
"name": "唐伯龙",
"sfz": "3212111990018989",
"content": "凡事都有偶然的凑巧,结果却又如宿命的必然",
"address": "江苏省无锡市"
}
POST http://localhost:9200/tmp_1/_doc
{
"@version": "1",
"@timestamp": "2020-06-19T09:06:26.446Z",
"name": "唐小虎",
"sfz": "321211199709227654",
"content": "一个人如果刻意逃避他所惧怕的东西,到头来会发现自己只是抄了近路去见它",
"address": "江苏省苏州市"
}
POST http://localhost:9200/tmp_1/_doc
{
"@version": "1",
"@timestamp": "2020-06-19T09:06:26.446Z",
"name": "李小龙",
"sfz": "1234211186709222348",
"content": "虫子被踩后缩起来,这是明智的,它借此减少重新被踩的概率。用道德的语言就叫:谦恭",
"address": "江苏省常州市"
}
POST http://localhost:9200/tmp_1/_doc
{
"@version": "1",
"@timestamp": "2020-06-19T09:06:26.446Z",
"name": "李四",
"sfz": "436754187709087623",
"content": "你好,1234,你好5678",
"address": "上海市"
}
调用接口:
http://localhost:8081/test?index=tmp_1&keyword=你好
# 响应
{
"total": 1,
"data": [
{
"@timestamp": "2020-06-19T09:06:26.446Z",
"address": "上海市",
"sfz": "436754187709087623",
"@version": "1",
"name": "李四",
"content": "<span style='color:red'>你</span><span style='color:red'>好</span>,1234,<span style='color:red'>你</span><span style='color:red'>好</span>5678"
}
]
}
http://localhost:8081/test?index=tmp_1&keyword=唐小虎
# 响应
{
"total": 1,
"data": [
{
"@timestamp": "2020-06-19T09:06:26.446Z",
"address": "江苏省苏州市",
"sfz": "321211199709227654",
"@version": "1",
"name": "<span style='color:red'>唐小虎</span>",
"content": "一个人如果刻意逃避他所惧怕的东西,到头来会发现自己只是抄了近路去见它"
}
]
}
测试场景没有全面覆盖,如有错误,欢迎指正。