文章目录
基于新闻的高亮搜索
1. 自定义analyzer
# 自定义分词器, news_analyzer是自定义的分词器的名字
PUT news
{
"settings": {
"analysis": {
"analyzer": {
"news_analyzer" : {
"char_filter": ["html_strip"],
"tokenizer": "keyword",
"filter": [
"my_lexicon_filter"
]
},
"news_search_analyzer": {
"char_filter": ["html_strip"],
"tokenizer": "keyword",
"filter": ["lowercase"]
}
},
"filter": {
"my_lexicon_filter": {
"type": "pinyin",
"keep_first_letter": true,
"keep_full_pinyin": false,
"keep_none_chinese": false,
"keep_separate_first_letter": false,
"keep_joined_full_pinyin": true,
"keep_none_chinese_in_joined_full_pinyin": true,
"none_chinese_pinyin_tokenize": false,
"limit_first_letter_length": 16,
"keep_original": true
}
}
}
}
}
2. 定义mappings
PUT news/_mapping
{
"properties": {
"id": {
"type": "long"
},
"title": {
"type": "text",
"analyzer": "hanlp_index"
},
"url": {
"type": "keyword"
},
"content": {
"type": "text",
"analyzer": "hanlp_index"
},
"tags": {
"type": "completion",
"analyzer": "news_analyzer",
"search_analyzer": "news_search_analyzer"
}
}
}
设置mappings的时候,可以指定 “dynamic”: false,意思是如果mappings中有些字段并没有指定,那么在新增数据的时候,该字段的数据会存入到es中,但是不会进行分词,但是可以被查出来。
3. 导入mysql的数据集
1.将news.sql导入mysql数据库
2.将mysql驱动包放在D:\elasticsearch\logstash-7.4.2\logstash-core\lib\jars目录下
3.将logstash-mysql-news.conf放在D:\elasticsearch\logstash-7.4.2\config目录下
4.进到logstash的bin目录下,执行:logstash.bat -f D:\elasticsearch\logstash-7.4.2\config\logstash-mysql-news.conf命令,开始导入数据。
4.编写suggestion与query
搜索要使用的suggestion
GET news/_search
{
"_source": ["id"],
"suggest": {
"tags_suggest": {
"prefix": "中",
"completion": {
"field": "tags",
"skip_duplicates": true,
"size": 10
}
}
}
}
**注: ** 在使用suggestion的时候,“skip_duplicates”: true,表示的意思是如果出现相同的建议,那么只会保留一个。
搜索要使用的query
GET news/_search
{
"_source": ["url"],
"query": {
"multi_match": {
"query": "中国赴塞尔维亚抗疫专家",
"fields": ["title", "content"]
}
},
"highlight": {
"post_tags": "</span>",
"pre_tags": "<span>",
"fields": {
"title": {},
"content": {}
}
}
}
5.依赖
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!-- https://mvnrepository.com/artifact/org.springframework.data/spring-data-elasticsearch -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.62</version>
</dependency>
</dependencies>
6.编写ElasticsearchConfig
package com.qf.config;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.convert.support.DefaultConversionService;
import org.springframework.data.elasticsearch.config.ElasticsearchConfigurationSupport;
import org.springframework.data.elasticsearch.core.ElasticsearchEntityMapper;
import org.springframework.data.elasticsearch.core.ElasticsearchTemplate;
import org.springframework.data.elasticsearch.core.EntityMapper;
import java.net.InetAddress;
import java.net.UnknownHostException;
@Configuration
public class ElasticsearchConfig extends ElasticsearchConfigurationSupport {
@Bean
public Client elasticsearchClient() throws UnknownHostException {
Settings settings = Settings.builder().put("cluster.name", "elasticsearch").build();
TransportClient client = new PreBuiltTransportClient(settings);
client.addTransportAddress(new TransportAddress(InetAddress.getByName("127.0.0.1"), 9300));
return client;
}
@Bean(name = {"elasticsearchOperations", "elasticsearchTemplate"})
public ElasticsearchTemplate elasticsearchTemplate() throws UnknownHostException {
return new ElasticsearchTemplate(elasticsearchClient(), entityMapper());
}
// use the ElasticsearchEntityMapper
@Bean
@Override
public EntityMapper entityMapper() {
ElasticsearchEntityMapper entityMapper = new ElasticsearchEntityMapper(elasticsearchMappingContext(),
new DefaultConversionService());
entityMapper.setConversions(elasticsearchCustomConversions());
return entityMapper;
}
}
7.POJO类的编写
package com.qf.entity;
import org.springframework.data.elasticsearch.annotations.Document;
@Document(indexName = "news", type = "_doc")
public class News {
private Integer id;
private String url;
private String title;
private String content;
public Integer getId() {
return id;
}
public void setId(Integer id) {
this.id = id;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
}
8. 编写NewsTipController
package com.qf.controller;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.search.suggest.SuggestBuilder;
import org.elasticsearch.search.suggest.completion.CompletionSuggestion;
import org.elasticsearch.search.suggest.completion.CompletionSuggestionBuilder;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.data.elasticsearch.core.ElasticsearchTemplate;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
// 新闻提示搜索
@RestController
@RequestMapping("/tip")
public class NewsTipController {
private ElasticsearchTemplate elasticsearchTemplate;
public NewsTipController(@Qualifier("elasticsearchTemplate")
ElasticsearchTemplate elasticsearchTemplate) {
this.elasticsearchTemplate = elasticsearchTemplate;
}
@GetMapping
public Set<String> movieSuggest(String text) {
/**
* 第一步构建 CompletionSuggestionBuilder
*/
CompletionSuggestionBuilder titlePrefixSuggest = new CompletionSuggestionBuilder("tags")
.prefix(text)
.size(10) //提示多少个次
.skipDuplicates(true); //忽略重复
/**
* 第二部在去构建 SuggestBuilder, 封装所有的建议形式
*/
SuggestBuilder suggestBuilder = new SuggestBuilder()
.addSuggestion("tag_prefix_suggestion", titlePrefixSuggest);
/**
* 第三部:构建搜索查询对象
*/
SearchRequestBuilder searchRequestBuilder = elasticsearchTemplate.getClient()
.prepareSearch("news") //在哪个索引中搜索
.suggest(suggestBuilder);
// 开始查询
SearchResponse response = searchRequestBuilder.get();
Suggest suggestResult = response.getSuggest(); //获取搜索建议结果
// 获取对应的搜索建议的结果
Suggest.Suggestion suggestion = suggestResult.getSuggestion("tag_prefix_suggestion");
Set<String> suggestionResult = new HashSet<>();
List<Object> list = suggestion.getEntries();
if(null != list && list.size() > 0){
Object object = list.get(0);
if(object instanceof CompletionSuggestion.Entry) {
CompletionSuggestion.Entry resultEntry = (CompletionSuggestion.Entry)object;
List<CompletionSuggestion.Entry.Option> options = resultEntry.getOptions();
if(null != options && options.size() > 0) {
for(CompletionSuggestion.Entry.Option opt : options) {
Text txt = opt.getText();
suggestionResult.add(txt.toString());
}
}
}
}
return suggestionResult;
}
}
9.编写NewsSearchController
package com.qf.controller;
import com.alibaba.fastjson.JSON;
import com.qf.entity.News;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.index.search.MultiMatchQuery;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.data.elasticsearch.core.ElasticsearchTemplate;
import org.springframework.data.elasticsearch.core.ResultsExtractor;
import org.springframework.data.elasticsearch.core.query.FetchSourceFilter;
import org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder;
import org.springframework.data.elasticsearch.core.query.SearchQuery;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.elasticsearch.common.text.Text;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
// 新闻内容搜索以及前端高亮显示
@RestController
@RequestMapping("/news")
public class NewsSearchController {
private ElasticsearchTemplate elasticsearchTemplate;
public NewsSearchController(@Qualifier("elasticsearchTemplate")
ElasticsearchTemplate elasticsearchTemplate) {
this.elasticsearchTemplate = elasticsearchTemplate;
}
/**
* GET news/_search
* {
* "_source": ["url"],
* "query": {
* "multi_match": {
* "query": "中国",
* "fields": ["title", "content"]
* }
* },
* "highlight": {
* "pre_tags": "<font color='red'>",
* "post_tags": "</font>",
* "fields": {
* "title": {},
* "content": {}
* }
* }
* }
*/
@GetMapping("/search")
public List<News> searchNews(String searchText) {
MultiMatchQueryBuilder multiMatchQuery = new MultiMatchQueryBuilder(searchText, "title", "content");
HighlightBuilder highlightBuilder = new HighlightBuilder()
.preTags("<font color='red'>")
.postTags("</font>")
.field("title")
.field("content");
SearchQuery searchQuery = new NativeSearchQueryBuilder()
.withIndices("news")
.withQuery(multiMatchQuery)
.withHighlightBuilder(highlightBuilder)
.withSourceFilter(new FetchSourceFilter(new String[]{"url", "id", "title"}, new String[]{}))
.build();
/**
* query() 方法的返回值就是实现了 ResultsExtractor 接口的 extract 的这个方法的返回值
*/
return elasticsearchTemplate.query(searchQuery, new NewsResultsExtractor());
}
class NewsResultsExtractor implements ResultsExtractor<List<News>> {
// 方法的 response, 就是查询之后的结果;但是我们需要处理成为 List<News>
@Override
public List<News> extract(SearchResponse response) {
// 获取命中的搜索
SearchHit[] hits = response.getHits().getHits();
List<News> newsList = new ArrayList<>();
for(SearchHit hit : hits) {
// 获取到新闻的json数据,但是对于当前我们本身的案例来说,我们只取了 url
String newsJson = hit.getSourceAsString();
// 对news的json数据的反序列化
News news = JSON.parseObject(newsJson, News.class);
// 获取高亮的字段部分
Map<String, HighlightField> highlightFieldMap = hit.getHighlightFields();
HighlightField titleField = highlightFieldMap.get("title");
HighlightField contentField = highlightFieldMap.get("content");
// 处理title部分
if(null != titleField) {
StringBuffer titles = new StringBuffer();
// 高亮的处理是将 包含了关键字的内容进行分段 截取,所以返回值为一个数组
Text[] titleFragments = titleField.getFragments();
if(null != titleFragments && titleFragments.length > 0) {
for (Text text : titleFragments) {
titles.append(text.toString());
}
}
news.setTitle(titles.toString());
}
if(null != contentField) {
StringBuffer contents = new StringBuffer();
// 高亮的处理是将 包含了关键字的内容进行分段 截取,所以返回值为一个数组
Text[] contentFragments = contentField.getFragments();
if(null != contentFragments && contentFragments.length > 0) {
for (Text text : contentFragments) {
contents.append(text.toString());
}
}
news.setContent(contents.toString());
}
newsList.add(news);
}
return newsList;
}
}
}
10.前端的实现
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
<link rel="stylesheet" href="css/bootstrap.min.css">
<link rel="stylesheet" href="css/jquery-ui.min.css">
<script src="js/jquery-3.5.0.js"></script>
<script src="js/jquery-ui.min.js"></script>
<script src="js/bootstrap.min.js"></script>
<script src="js/vue.js"></script>
<script src="js/axios.min.js"></script>
<style>
.desc-text {
height: 50px;
overflow: hidden;
}
a,a:link, a:visited, a:hover, a:active {
text-decoration: none;
}
</style>
</head>
<body>
<div class="container-fluid">
<div class="row mt-3 pb-3 mb-3" style="border-bottom: 1px solid #e2e3e5;">
<div class="col-10">
<form class="form-inline" onsubmit="javascript: return false;">
<div class="form-group col-6">
<input class="form-control col" id="search-text" onkeyup="searchNews()">
</div>
<button type="submit" class="btn btn-primary col-1">搜索一下</button>
</form>
</div>
</div>
<div id="app">
<div v-for="n in news" class="row mb-3" :key="n.id">
<div class="col-10">
<h4><a target="_blank" :href="n.url"><span v-html="n.title"></span></a></h4>
<p v-html="n.content">
</p>
</div>
</div>
</div>
</div>
</body>
<script>
var vm = new Vue({
el: '#app',
data() {
return {
news: []
}
}
})
$('#search-text').autocomplete({
delay: 300, // 延迟查询,意思是当在输入框中多输入了一个词,多久往服务器发送请求
max: 20, // 指的是下拉列表中最多出现多少个次
source: function(request, cb) {
$.ajax({
url: 'tip',
data: {text: request.term},
type: 'get',
dataType: 'json',
success: function(_data) {
let tips = [];
for(let i = 0; i < _data.length; i++) {
tips.push(_data[i]);
}
cb(tips);
}
})
},
minlength: 1 // 最低输入多少个字母就往服务器端发送请求
})
function searchNews() {
let searchText = $('#search-text').val(); //拿到搜索内容
if(searchText && searchText.trim()) {
vm.news = [];
axios.get('news/search?searchText=' + searchText)
.then(res => {
for(let i = 0; i < res.data.length; i++) {
vm.news.push(res.data[i])
}
})
}
}
</script>
</html>
11.页面效果
length: 1 // 最低输入多少个字母就往服务器端发送请求
})
function searchNews() {
let searchText = $('#search-text').val(); //拿到搜索内容
if(searchText && searchText.trim()) {
vm.news = [];
axios.get('news/search?searchText=' + searchText)
.then(res => {
for(let i = 0; i < res.data.length; i++) {
vm.news.push(res.data[i])
}
})
}
}