基于新闻的高亮搜索

10 篇文章 0 订阅

基于新闻的高亮搜索

1. 自定义analyzer
# 自定义分词器, news_analyzer是自定义的分词器的名字
PUT news
{
  "settings": {
    "analysis": {
      "analyzer": {
        "news_analyzer" : {
          "char_filter": ["html_strip"],
          "tokenizer": "keyword",
          "filter": [
             "my_news_filter"
          ]
        },
        "news_search_analyzer": {
          "char_filter": ["html_strip"],
          "tokenizer": "keyword",
          "filter": ["lowercase"]
        }
      },
      "filter": {
        "my_news_filter": {
          "type": "pinyin",
          "keep_first_letter": true,
          "keep_full_pinyin": false,
          "keep_none_chinese": false,
          "keep_separate_first_letter": false,
          "keep_joined_full_pinyin": true,
          "keep_none_chinese_in_joined_full_pinyin": true,
          "none_chinese_pinyin_tokenize": false,
          "limit_first_letter_length": 16,
          "keep_original": true
        } 
      }
    }
  }
}
2. 定义mappings
PUT news/_mapping
{
    "properties": {
      "id": {
        "type": "long"
      },
      "title": {
        "type": "text",
        "analyzer": "hanlp_index"
      },
      "url": {
        "type": "keyword"
      },
      "content": {
        "type": "text",
        "analyzer": "hanlp_index"
      },
      "tags": {
        "type": "completion",
        "analyzer": "news_analyzer",
        "search_analyzer": "news_search_analyzer"
      }
    }
}

设置mappings的时候,可以指定 “dynamic”: false,意思是如果mappings中有些字段并没有指定,那么在新增数据的时候,该字段的数据会存入到es中,但是不会进行分词,但是可以被查出来。

3. 导入mysql的数据集

1.将news.sql导入mysql数据库

2.将mysql驱动包放在D:\elasticsearch\logstash-7.4.2\logstash-core\lib\jars目录下

3.将logstash-mysql-news.conf放在D:\elasticsearch\logstash-7.4.2\config目录下

4.进到logstash的bin目录下,执行:logstash.bat -f D:\elasticsearch\logstash-7.4.2\config\logstash-mysql-news.conf 命令,开始导入数据。

4.编写suggestion与query

搜索要使用的suggestion

GET news/_search
{
  "_source": ["id"], 
  "suggest": {
    "tags_suggest": {
      "prefix": "中",
      "completion": {
        "field": "tags",
        "skip_duplicates": true,
        "size": 10
      }
    }
  }
}

在使用suggestion的时候,“skip_duplicates”: true,表示的意思是如果出现相同的建议,那么只会保留一个。

搜索要使用的query

GET news/_search
{
  "_source": ["url"], 
  "query": {
    "multi_match": {
      "query": "中国赴塞尔维亚抗疫专家",
      "fields": ["title", "content"]
    }
  },
  "highlight": {
    "post_tags": "</span>",
    "pre_tags": "<span>",
    "fields": {
      "title": {},
      "content": {}
    }
  }
}
5.导入依赖
<dependencies>
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-web</artifactId>
    </dependency>

    <dependency>
        <groupId>org.projectlombok</groupId>
        <artifactId>lombok</artifactId>
    </dependency>

    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
    </dependency>

    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-test</artifactId>
        <scope>test</scope>
    </dependency>
</dependencies>
6.编写ElasticsearchConfig
package com.qf.config;

import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.data.elasticsearch.client.ClientConfiguration;
import org.springframework.data.elasticsearch.client.RestClients;
import org.springframework.data.elasticsearch.config.AbstractElasticsearchConfiguration;
import org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate;

@Configuration
public class RestClientConfig extends AbstractElasticsearchConfiguration {

    @Override
    @Bean
    public RestHighLevelClient elasticsearchClient() {

        final ClientConfiguration clientConfiguration = ClientConfiguration.builder()
                .connectedTo("localhost:9200")
                .build();

        return RestClients.create(clientConfiguration).rest();
    }


    @Bean
    public ElasticsearchRestTemplate elasticsearchRestTemplate() {
        return new ElasticsearchRestTemplate(elasticsearchClient());
    }
}
7.POJO类的编写
package com.qf.pojo;

import lombok.Data;
import org.springframework.data.elasticsearch.annotations.Document;

@Document(indexName = "news")
@Data
public class News {

    private Integer id;
    private String url;
    private String title;
    private String content;
    private String tags;
    
}
8. 编写NewsSuggestController
package com.qf.controller;

import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.search.suggest.SuggestBuilder;
import org.elasticsearch.search.suggest.completion.CompletionSuggestion;
import org.elasticsearch.search.suggest.completion.CompletionSuggestionBuilder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate;
import org.springframework.data.elasticsearch.core.mapping.IndexCoordinates;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;

import java.util.HashSet;
import java.util.List;
import java.util.Set;

// 新闻提示搜索
@RestController
@RequestMapping("news-suggest")
public class NewsSuggestController {

    @Autowired
    private ElasticsearchRestTemplate elasticsearchRestTemplate;


    @GetMapping
    public Set<String> movieSuggest(String text) {
        /**
         * 第一步构建 CompletionSuggestionBuilder
         */
        CompletionSuggestionBuilder titlePrefixSuggest = new CompletionSuggestionBuilder("tags")
                .prefix(text)
                .size(10)   //显示个数
                .skipDuplicates(true);  //忽略重复

        /**
         * 第二步在去构建 SuggestBuilder, 封装所有的建议形式
         */
        SuggestBuilder suggestBuilder = new SuggestBuilder()
                .addSuggestion("suggest-news", titlePrefixSuggest);

        SearchResponse response=elasticsearchRestTemplate.suggest(suggestBuilder, IndexCoordinates.of("news"));

        //获取Suggest对象
        Suggest suggest = response.getSuggest();

        // 获取对应的搜索建议的结果
        Suggest.Suggestion suggestion = suggest.getSuggestion("suggest-news");

        Set<String> suggestionResult = new HashSet<>();

        List<Object> list = suggestion.getEntries();
        if(null != list && list.size() > 0){
            Object object = list.get(0);
            if(object instanceof CompletionSuggestion.Entry) {
                CompletionSuggestion.Entry resultEntry = (CompletionSuggestion.Entry)object;
                List<CompletionSuggestion.Entry.Option> options = resultEntry.getOptions();
                if(null != options && options.size() > 0) {
                    for(CompletionSuggestion.Entry.Option opt : options) {
                        Text txt = opt.getText();
                        suggestionResult.add(txt.toString());
                    }
                }
            }
        }

        return suggestionResult;
    }
}
9.编写NewsSearchController
package com.qf.controller;

import com.qf.pojo.News;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate;
import org.springframework.data.elasticsearch.core.SearchHit;
import org.springframework.data.elasticsearch.core.SearchHits;
import org.springframework.data.elasticsearch.core.query.NativeSearchQuery;
import org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;

// 新闻内容搜索以及前端高亮显示
@RestController
@RequestMapping("/news")
public class NewsSearchController {

    @Autowired
    private ElasticsearchRestTemplate elasticsearchRestTemplate;

    /**
     * GET news/_search
     * {
     *   "_source": ["url"],
     *   "query": {
     *     "multi_match": {
     *       "query": "中国",
     *       "fields": ["title", "content"]
     *     }
     *   },
     *   "highlight": {
     *     "pre_tags": "<font color='red'>",
     *     "post_tags": "</font>",
     *     "fields": {
     *       "title": {},
     *       "content": {}
     *     }
     *   }
     * }
     */
    @GetMapping("/search")
    public List<News> searchNews(String searchText) {

        MultiMatchQueryBuilder multiMatchQueryBuilder = new MultiMatchQueryBuilder(searchText, "title", "content");

        HighlightBuilder highlightBuilder = new HighlightBuilder()
                .preTags("<font color='red'>")
                .postTags("</font>")
                .field("title")
                .field("content");

        NativeSearchQuery query = new NativeSearchQueryBuilder()
                .withQuery(multiMatchQueryBuilder)
                .withHighlightBuilder(highlightBuilder)
                .build();

        SearchHits<News> search = elasticsearchRestTemplate.search(query,News.class);
        // 得到查询结构返回的内容
        List<SearchHit<News>> searchHits = search.getSearchHits();
        // 设置一个需要返回的实体类集合
        List<News> news = new ArrayList<>();
        for(SearchHit<News> searchHit:searchHits){
            //获取高亮显示内容
            Map<String,List<String>> highLightFields = searchHit.getHighlightFields();
            // 将高亮内容填充到content中
            searchHit.getContent().setTitle(highLightFields.get("title") == null ? searchHit.getContent().getTitle() : highLightFields.get("title").get(0));
            searchHit.getContent().setContent(highLightFields.get("content") == null ? searchHit.getContent().getContent() : highLightFields.get("content").get(0));
            // 放到实体类中
            news.add(searchHit.getContent());
        }

        return news;
    }
}
10.前端的实现

把课件中的静态资源文件拷贝到static目录下即可

11.页面效果

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-gO1mVsFN-1663916202882)(images\success.png)]

searchHit.getContent().setContent(highLightFields.get("content") == null ? searchHit.getContent().getContent() : highLightFields.get("content").get(0));
        // 放到实体类中
        news.add(searchHit.getContent());
    }

    return news;
}

}


#### 10.前端的实现

>把课件中的静态资源文件拷贝到static目录下即可

#### 11.页面效果

[外链图片转存中...(img-gO1mVsFN-1663916202882)]

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

程序员子衿

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值