注意:springboot集成elasticsearch有版本要求
参考文档:https://blog.csdn.net/haohaifeng002/article/details/102887921
1、pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.4.1</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>com.freesun</groupId>
<artifactId>pachong</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>pachong</name>
<description>Demo project for Spring Boot</description>
<properties>
<java.version>1.8</java.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-jdbc</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-thymeleaf</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.mybatis.spring.boot</groupId>
<artifactId>mybatis-spring-boot-starter</artifactId>
<version>2.1.4</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<!--jsoup爬取网页-->
<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
<!--elasticsearch-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
<!--fastjson-->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.62</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>
2、配置文件application.properties
server.port=8089
# springboot2.0是集成elasticsearch7
spring.elasticsearch.rest.uris=http://localhost:9200
spring.elasticsearch.rest.username=elastic
spring.elasticsearch.rest.password=123456
3、elasticsearch配置类
package com.freesun.pachong.config;
import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class ElasticSearchClientConfig {
@Bean
public RestHighLevelClient restHighLevelClient(){
return new RestHighLevelClient(
RestClient.builder(
new HttpHost("127.0.0.1",9200,"http") //也可写多个,集群
)
);
}
}
4、实体类Content.java
package com.freesun.pachong.pojo;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
@AllArgsConstructor
public class Content {
private String title;
private String img;
private String price;
}
5、工具类
爬取京东数据HtmlParseUtil.java(这里数据源直接使用京东上的,数据源也可以是数据库)
package com.freesun.pachong.utils;
import com.freesun.pachong.pojo.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@Component
public class HtmlParseUtil {
public static void main(String[] args) throws IOException {
System.out.println(new HtmlParseUtil().parseJD("java")); ;
}
public List<Content> parseJD(String keyword) throws IOException {
String url = "https://search.jd.com/Search?keyword="+keyword;
Document document = Jsoup.parse(new URL(url), 30000);
Element element = document.getElementById("J_goodsList");
Elements elements = element.getElementsByTag("li");
List<Content> goodsList = new ArrayList<>();
for( Element e: elements){
String title = e.getElementsByClass("p-name").text();
String img = e.getElementsByTag("img").eq(0).attr("data-lazy-img");
String price = e.getElementsByClass("p-price").text();
Content content = new Content();
content.setTitle(title);
content.setImg(img);
content.setPrice(price);
goodsList.add(content);
}
return goodsList;
}
}
6、controller层
contentController.java
package com.freesun.pachong.web;
import com.freesun.pachong.service.ContentService;
import org.springframework.web.bind.annotation.*;
import javax.annotation.Resource;
import java.io.IOException;
import java.util.List;
import java.util.Map;
@RestController
public class ContentController {
@Resource
private ContentService contentService;
/**
* 导入含keyword的数据至es索引jd_goods
*/
@RequestMapping("/parse/{keyword}")
public Boolean parse(@PathVariable("keyword")String keyword) throws IOException {
return contentService.paresContent(keyword);
}
/**
* 普通搜索
*/
@RequestMapping("/search/{keyword}/{pageNo}/{pageSize}")
public List<Map<String,Object>> search(@PathVariable("keyword")String keyword,
@PathVariable("pageNo")int pageNo,
@PathVariable("pageSize")int pageSize
) throws IOException {
return contentService.search(keyword,pageNo,pageSize);
}
/**
* 高亮搜索
*/
@RequestMapping("/searchHightLight/{keyword}/{pageNo}/{pageSize}")
public List<Map<String,Object>> searchHightLight(@PathVariable("keyword")String keyword,
@PathVariable("pageNo")int pageNo,
@PathVariable("pageSize")int pageSize
) throws IOException {
return contentService.searchHightLight(keyword,pageNo,pageSize);
}
}
7、service层
Content.service
package com.freesun.pachong.service;
import com.alibaba.fastjson.JSON;
import com.freesun.pachong.pojo.Content;
import com.freesun.pachong.utils.HtmlParseUtil;
import org.apache.lucene.util.QueryBuilder;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
@Service
public class ContentService {
@Resource
private RestHighLevelClient restHighLevelClient;
/**
* 将含keyword的数据导入elasticsearch索引库中
*/
public Boolean paresContent(String keyword) throws IOException {
List<Content> contentList = new HtmlParseUtil().parseJD(keyword);
BulkRequest request = new BulkRequest();
request.timeout("10s");
for(int i=0; i<contentList.size(); i++){
request.add(new IndexRequest("jd_goods")
.id(""+(i+1))
.source(JSON.toJSONString(contentList.get(i)), XContentType.JSON)
);
}
BulkResponse bulk = restHighLevelClient.bulk(request, RequestOptions.DEFAULT);
return !bulk.hasFailures();
}
/**
* es 实现普通搜索
*/
public List<Map<String,Object>> search(String keyword, int pageNo, int pageSize) throws IOException {
if(pageNo<1){
pageNo = 1;
}
//条件搜索 引用jd_goods索引库
SearchRequest searchRequest = new SearchRequest("jd_goods");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//分页
searchSourceBuilder.from(pageNo);
searchSourceBuilder.size(pageSize);
//精准匹配 title是es中索引中的字段,都是小写字母,不是驼峰原则
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
searchSourceBuilder.query(termQueryBuilder);
searchSourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
// 执行搜索
searchRequest.source(searchSourceBuilder);
SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
List<Map<String,Object>> list = new ArrayList<>();
for(SearchHit documentFields: response.getHits().getHits()){
list.add(documentFields.getSourceAsMap());
}
return list;
}
/**
* es 实现高亮搜索
*/
public List<Map<String,Object>> searchHightLight(String keyword, int pageNo, int pageSize) throws IOException {
if(pageNo<1){
pageNo = 1;
}
//条件搜索 引用jd_goods索引库
SearchRequest searchRequest = new SearchRequest("jd_goods");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//分页
searchSourceBuilder.from(pageNo);
searchSourceBuilder.size(pageSize);
//高亮
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("title");
highlightBuilder.preTags("<span style='color:green'>");
highlightBuilder.postTags("</span>");
searchSourceBuilder.highlighter(highlightBuilder);
//精准匹配
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
searchSourceBuilder.query(termQueryBuilder);
searchSourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
// 执行搜索
searchRequest.source(searchSourceBuilder);
SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
List<Map<String,Object>> list = new ArrayList<>();
for(SearchHit hit: response.getHits().getHits()){
Map<String, HighlightField> highlightFields = hit.getHighlightFields();
HighlightField title = highlightFields.get("title");
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
//解析高亮的字段,将原来的字段换为我们高亮的字段即可
if(title != null){
Text[] fragments = title.fragments();
String n_title = "";
for(Text text : fragments){
n_title += text;
}
sourceAsMap.put("title",n_title);
}
list.add(sourceAsMap);
}
return list;
}
}
8、启动类
注意:若没有使用到数据库,需要加exclude= {DataSourceAutoConfiguration.class},去掉数据库配置
package com.freesun.pachong;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
@SpringBootApplication(exclude= {DataSourceAutoConfiguration.class})
public class PachongApplication {
public static void main(String[] args) {
SpringApplication.run(PachongApplication.class, args);
}
}
9、测试
-
导入含keyword(这里导入含java关键字)的数据至es的索引库jd_goods,导入成功则返回true:http://localhost:8089/parse/java
js_goods索引库
-
普通搜索:http://localhost:8089/search/java/1/20
-
高亮搜索:http://localhost:8089/searchHightLight/java/1/20