基于新闻的高亮搜索
1. 自定义analyzer
# 自定义分词器, news_analyzer是自定义的分词器的名字
PUT news
{
"settings": {
"analysis": {
"analyzer": {
"news_analyzer" : {
"char_filter": ["html_strip"],
"tokenizer": "keyword",
"filter": [
"my_news_filter"
]
},
"news_search_analyzer": {
"char_filter": ["html_strip"],
"tokenizer": "keyword",
"filter": ["lowercase"]
}
},
"filter": {
"my_news_filter": {
"type": "pinyin",
"keep_first_letter": true,
"keep_full_pinyin": false,
"keep_none_chinese": false,
"keep_separate_first_letter": false,
"keep_joined_full_pinyin": true,
"keep_none_chinese_in_joined_full_pinyin": true,
"none_chinese_pinyin_tokenize": false,
"limit_first_letter_length": 16,
"keep_original": true
}
}
}
}
}
2. 定义mappings
PUT news/_mapping
{
"properties": {
"id": {
"type": "long"
},
"title": {
"type": "text",
"analyzer": "hanlp_index"
},
"url": {
"type": "keyword"
},
"content": {
"type": "text",
"analyzer": "hanlp_index"
},
"tags": {
"type": "completion",
"analyzer": "news_analyzer",
"search_analyzer": "news_search_analyzer"
}
}
}
设置mappings的时候,可以指定 “dynamic”: false,意思是如果mappings中有些字段并没有指定,那么在新增数据的时候,该字段的数据会存入到es中,但是不会进行分词,但是可以被查出来。
3. 导入mysql的数据集
1.将news.sql导入mysql数据库
2.将mysql驱动包放在D:\elasticsearch\logstash-7.4.2\logstash-core\lib\jars目录下
3.将logstash-mysql-news.conf放在D:\elasticsearch\logstash-7.4.2\config目录下
4.进到logstash的bin目录下,执行:logstash.bat -f D:\elasticsearch\logstash-7.4.2\config\logstash-mysql-news.conf 命令,开始导入数据。
4.编写suggestion与query
搜索要使用的suggestion
GET news/_search
{
"_source": ["id"],
"suggest": {
"tags_suggest": {
"prefix": "中",
"completion": {
"field": "tags",
"skip_duplicates": true,
"size": 10
}
}
}
}
在使用suggestion的时候,“skip_duplicates”: true,表示的意思是如果出现相同的建议,那么只会保留一个。
搜索要使用的query
GET news/_search
{
"_source": ["url"],
"query": {
"multi_match": {
"query": "中国赴塞尔维亚抗疫专家",
"fields": ["title", "content"]
}
},
"highlight": {
"post_tags": "</span>",
"pre_tags": "<span>",
"fields": {
"title": {},
"content": {}
}
}
}
5.导入依赖
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
6.编写ElasticsearchConfig
package com.qf.config;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.data.elasticsearch.client.ClientConfiguration;
import org.springframework.data.elasticsearch.client.RestClients;
import org.springframework.data.elasticsearch.config.AbstractElasticsearchConfiguration;
import org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate;
@Configuration
public class RestClientConfig extends AbstractElasticsearchConfiguration {
@Override
@Bean
public RestHighLevelClient elasticsearchClient() {
final ClientConfiguration clientConfiguration = ClientConfiguration.builder()
.connectedTo("localhost:9200")
.build();
return RestClients.create(clientConfiguration).rest();
}
@Bean
public ElasticsearchRestTemplate elasticsearchRestTemplate() {
return new ElasticsearchRestTemplate(elasticsearchClient());
}
}
7.POJO类的编写
package com.qf.pojo;
import lombok.Data;
import org.springframework.data.elasticsearch.annotations.Document;
@Document(indexName = "news")
@Data
public class News {
private Integer id;
private String url;
private String title;
private String content;
private String tags;
}
8. 编写NewsSuggestController
package com.qf.controller;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.search.suggest.SuggestBuilder;
import org.elasticsearch.search.suggest.completion.CompletionSuggestion;
import org.elasticsearch.search.suggest.completion.CompletionSuggestionBuilder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate;
import org.springframework.data.elasticsearch.core.mapping.IndexCoordinates;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
// 新闻提示搜索
@RestController
@RequestMapping("news-suggest")
public class NewsSuggestController {
@Autowired
private ElasticsearchRestTemplate elasticsearchRestTemplate;
@GetMapping
public Set<String> movieSuggest(String text) {
/**
* 第一步构建 CompletionSuggestionBuilder
*/
CompletionSuggestionBuilder titlePrefixSuggest = new CompletionSuggestionBuilder("tags")
.prefix(text)
.size(10) //显示个数
.skipDuplicates(true); //忽略重复
/**
* 第二步在去构建 SuggestBuilder, 封装所有的建议形式
*/
SuggestBuilder suggestBuilder = new SuggestBuilder()
.addSuggestion("suggest-news", titlePrefixSuggest);
SearchResponse response=elasticsearchRestTemplate.suggest(suggestBuilder, IndexCoordinates.of("news"));
//获取Suggest对象
Suggest suggest = response.getSuggest();
// 获取对应的搜索建议的结果
Suggest.Suggestion suggestion = suggest.getSuggestion("suggest-news");
Set<String> suggestionResult = new HashSet<>();
List<Object> list = suggestion.getEntries();
if(null != list && list.size() > 0){
Object object = list.get(0);
if(object instanceof CompletionSuggestion.Entry) {
CompletionSuggestion.Entry resultEntry = (CompletionSuggestion.Entry)object;
List<CompletionSuggestion.Entry.Option> options = resultEntry.getOptions();
if(null != options && options.size() > 0) {
for(CompletionSuggestion.Entry.Option opt : options) {
Text txt = opt.getText();
suggestionResult.add(txt.toString());
}
}
}
}
return suggestionResult;
}
}
9.编写NewsSearchController
package com.qf.controller;
import com.qf.pojo.News;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate;
import org.springframework.data.elasticsearch.core.SearchHit;
import org.springframework.data.elasticsearch.core.SearchHits;
import org.springframework.data.elasticsearch.core.query.NativeSearchQuery;
import org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
// 新闻内容搜索以及前端高亮显示
@RestController
@RequestMapping("/news")
public class NewsSearchController {
@Autowired
private ElasticsearchRestTemplate elasticsearchRestTemplate;
/**
* GET news/_search
* {
* "_source": ["url"],
* "query": {
* "multi_match": {
* "query": "中国",
* "fields": ["title", "content"]
* }
* },
* "highlight": {
* "pre_tags": "<font color='red'>",
* "post_tags": "</font>",
* "fields": {
* "title": {},
* "content": {}
* }
* }
* }
*/
@GetMapping("/search")
public List<News> searchNews(String searchText) {
MultiMatchQueryBuilder multiMatchQueryBuilder = new MultiMatchQueryBuilder(searchText, "title", "content");
HighlightBuilder highlightBuilder = new HighlightBuilder()
.preTags("<font color='red'>")
.postTags("</font>")
.field("title")
.field("content");
NativeSearchQuery query = new NativeSearchQueryBuilder()
.withQuery(multiMatchQueryBuilder)
.withHighlightBuilder(highlightBuilder)
.build();
SearchHits<News> search = elasticsearchRestTemplate.search(query,News.class);
// 得到查询结构返回的内容
List<SearchHit<News>> searchHits = search.getSearchHits();
// 设置一个需要返回的实体类集合
List<News> news = new ArrayList<>();
for(SearchHit<News> searchHit:searchHits){
//获取高亮显示内容
Map<String,List<String>> highLightFields = searchHit.getHighlightFields();
// 将高亮内容填充到content中
searchHit.getContent().setTitle(highLightFields.get("title") == null ? searchHit.getContent().getTitle() : highLightFields.get("title").get(0));
searchHit.getContent().setContent(highLightFields.get("content") == null ? searchHit.getContent().getContent() : highLightFields.get("content").get(0));
// 放到实体类中
news.add(searchHit.getContent());
}
return news;
}
}
10.前端的实现
把课件中的静态资源文件拷贝到static目录下即可
11.页面效果
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-gO1mVsFN-1663916202882)(images\success.png)]
searchHit.getContent().setContent(highLightFields.get("content") == null ? searchHit.getContent().getContent() : highLightFields.get("content").get(0));
// 放到实体类中
news.add(searchHit.getContent());
}
return news;
}
}
#### 10.前端的实现
>把课件中的静态资源文件拷贝到static目录下即可
#### 11.页面效果
[外链图片转存中...(img-gO1mVsFN-1663916202882)]