引入相关依赖
<properties>
<java.version>1.8</java.version>
<elasticsearch.version>7.6.1</elasticsearch.version>
</properties>
<dependencies>
<!-- 解析网页-->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.62</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-thymeleaf</artifactId>
<version>2.2.4.RELEASE</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-configuration-processor</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.junit.vintage</groupId>
<artifactId>junit-vintage-engine</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
解析页面所用的实体类
package com.example.bean;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
/**
* @author kuai
*/
@Data
@AllArgsConstructor
@NoArgsConstructor
public class Content {
private String title;
private String price;
private String img ;
}
解析页面工具类
package com.example.utils;
import com.example.bean.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
/**
* @author kuai
*/
@Component
public class HtmlPageUtil {
/* public static void main(String[] args) throws IOException {
List<Object> list = new HtmlPageUtil().parseJD("java");
}*/
public List<Object> parseJD(String keywords) throws IOException {
String url = "https://search.jd.com/Search?keyword=" + keywords;
//页面对象
Document document = Jsoup.parse(new URL(url), 30000);
//页面中的方法都可用
Element element = document.getElementById("J_goodsList");
// 获取所有li
Elements elements = element.getElementsByTag("li");
List<Object> list = new ArrayList<>();
for (Element el : elements) {
String img = el.getElementsByTag("img").eq(0).attr("src");
String price = el.getElementsByClass("p-price").eq(0).text();
String title = el.getElementsByClass("p-name").eq(0).text();
list.add(new Content(title, price, img));
}
return list;
}
}
ES配置
package com.example.config;
import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
/**
* @author kuai
*/
@Configuration
public class ElasticsearchConfig {
@Bean
public RestHighLevelClient restHighLevelClient(){
return new RestHighLevelClient(RestClient.builder(new HttpHost("localhost",9200,"http")));
}
}
提供服务类
package com.example.service;
import com.alibaba.fastjson.JSON;
import com.example.utils.HtmlPageUtil;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
/**
* @author kuai
*/
@Service
public class ContentService {
@Autowired
private RestHighLevelClient restHighLevelClient;
public Boolean paresContent(String keywords) throws IOException {
List<Object> list = new HtmlPageUtil().parseJD(keywords);
//查询的数据放到索引中
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("2m");
for (int i = 0; i <list.size() ; i++) {
bulkRequest.add(new IndexRequest("jd_goods").source(JSON.toJSONString(list.get(i)), XContentType.JSON));
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
return !bulk.hasFailures();
}
public List<Map<String,Object>> searchPage(String keyword, int pageNo, int size ) throws IOException {
if(pageNo<=1){
pageNo=1;
}
//条件搜索
SearchRequest searchRequest = new SearchRequest("jd_goods");
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
//分页
sourceBuilder.from(pageNo);
sourceBuilder.size(size);
//精准匹配
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
sourceBuilder.query(termQueryBuilder);
sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
//关键字高亮
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("title");
//关闭多个高亮显示
highlightBuilder.requireFieldMatch(false);
highlightBuilder.preTags("<span style='color: red'>");
highlightBuilder.postTags("</span>");
sourceBuilder.highlighter(highlightBuilder);
//执行搜索
searchRequest.source(sourceBuilder);
SearchResponse search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
//解析结果
ArrayList<Map<String,Object>> list = new ArrayList<>();
for(SearchHit hit: search.getHits().getHits()){
//解析高亮字段
Map<String, HighlightField> highlightFields = hit.getHighlightFields();
HighlightField title = highlightFields.get("title");
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
if(title!=null){
Text[] fragments = title.fragments();
String new_title="";
for ( Text text: fragments) {
new_title+=title;
}
sourceAsMap.put("title",new_title);
}
list.add(sourceAsMap);
}
return list;
}
}
控制类
package com.example.controller;
import com.example.service.ContentService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.bind.annotation.RestController;
import java.io.IOException;
import java.util.List;
import java.util.Map;
/**
* @author kuai
*/
@RestController
public class ContentController {
@GetMapping("/test")
public String test(){
return "test";
}
@Autowired
private ContentService contentService;
@GetMapping("/parse/{keyword}")
@ResponseBody
public Boolean parse(@PathVariable("keyword") String keyword) throws IOException {
Boolean aBoolean = contentService.paresContent(keyword);
return aBoolean;
}
@GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
public List<Map<String, Object>> search(@PathVariable("keyword") String keyword,
@PathVariable("pageNo") int pageNo,
@PathVariable("pageSize") int pageSize, Model model) throws IOException {
contentService.paresContent(keyword);
List<Map<String, Object>> maps = contentService.searchPage(keyword, pageNo, pageSize);
return maps;
}
}
解决异步请求问题
package com.example.config;
import org.springframework.context.annotation.Configuration;
import org.springframework.web.servlet.config.annotation.CorsRegistry;
import org.springframework.web.servlet.config.annotation.WebMvcConfigurer;
/**
* @author kuai
*/
@Configuration
public class CorsConfig implements WebMvcConfigurer {
@Override
public void addCorsMappings(CorsRegistry registry) {
registry.addMapping("/**")
.allowedOrigins("*")
.allowedMethods("GET", "HEAD", "POST", "PUT", "DELETE", "OPTIONS")
.allowCredentials(true)
.maxAge(3600)
.allowedHeaders("*");
}
}
前端页面
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
<!-- <script type="text/javascript" src="js/jquery.min.js"></script>-->
<script src="https://cdn.staticfile.org/jquery/1.10.2/jquery.min.js"></script>
<script type="text/javascript">
function fun() {
var keyword = document.getElementById("keywords").value;
$.ajax({
type: "get",
url: 'http://localhost:8080/search/' + keyword + '/1/10',
dataType: "json", //数据格式设置为json
success: function (data) {//成功的回调函数
for (var i = 0; i < data.length; i++) {
var title = data[i].title;
var s = title.substring(20);
var split = s.split("]");
var img = data[i].img;
var price = data[i].price;
document.getElementById("wrap").innerHTML += '<li><img src='+img+'></li><li>'+split[0]+'</li><li>'+price+'</li><hr>'
}
}
});
}
</script>
</head>
<body >
<form action="#">
<input type="text" id="keywords">
<input type="submit" onclick="fun()" value="提交">
</form>
<div style="margin-left: 10%">
<ul id="wrap" style="list-style: none">
</ul>
</div>
</body>
</html>
启动项目效果图