包含 springboot集成ElasticSearch+索引和文档的API操作详解
https://www.kuangstudy.com/bbs/1390860116089163777
京东搜索
爬虫
从京东爬取数据
新建一个工具类
导入依赖
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.70</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>
爬取数据的工具类
@Component
public class jsoup {
public static void main(String[] args) throws Exception {
new jsoup().parseJd("html").forEach(System.out::println);
}
public List<Content> parseJd(String keywords) throws Exception {
// 获取请求 https://search.jd.com/Search?keyword=java
String url = "https://search.jd.com/Search?keyword="+keywords;
// 解析网页 (Jsoup 返回的 Document 就是 浏览器Document 对象)
Document document = Jsoup.parse(new URL(url), 30000);
// 找到需要的div节点 用js知识获取节点内容
Element jGoodsList = document.getElementById("J_goodsList");
//System.out.println(jGoodsList.html());
// 获取所有的 li 元素
Elements li = document.getElementsByTag("li");
ArrayList<Content> list = new ArrayList<>();
// 获取元素中的内容
for (Element el : li) {
if ("gl-item".equalsIgnoreCase(el.attr("class"))) {
String img = el.getElementsByTag("img").eq(0).attr("data-lazy-img");
String price = el.getElementsByClass("p-price").eq(0).text();
String name = el.getElementsByClass("p-name").eq(0).text();
Content content = new Content();
content.setImg(img);
content.setPrice(price);
content.setName(name);
list.add(content);
}
}
return list;
}
}
Service层
1、解析数据放入 es
2、获取数据之后实现搜索功能
3、获取数据之后实现高亮搜索功能
@Service
public class ContentService {
@Autowired
private RestHighLevelClient restHighLevelClient;
// 1、解析数据放入 es
public Boolean insertElasticearch(String keyword) throws Exception {
//批量插入索引
List<Content> contents = new jsoup().parseJd(keyword);
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("30000s");
for (int i = 0; i < contents.size(); i++) {
System.out.println(contents.get(i));
bulkRequest.add(
new IndexRequest("ljh")
.source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
return !bulk.hasFailures();
}
// 2、获取数据之后实现搜索功能
public List<Map<String,Object>> searchPage(String keyword,int pageNo,int pageSize) throws IOException {
// 条件搜索
SearchRequest searchRequest = new SearchRequest("ljh");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//分页
searchSourceBuilder.from(pageNo);
searchSourceBuilder.size(pageSize);
// 精确匹配
TermQueryBuilder termQuery = QueryBuilders.termQuery("name", keyword);
searchSourceBuilder.query(termQuery);
searchSourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
// 执行搜索
searchRequest.source(searchSourceBuilder);
SearchResponse search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
// 解析结果
ArrayList<Map<String,Object>> objects = new ArrayList<>();
for (SearchHit searchHit : search.getHits().getHits()) {
objects.add(searchHit.getSourceAsMap());
}
return objects;
}
// 3、获取数据之后实现高亮搜索功能
public List<Map<String, Object>> searchHighlightedPage(String key,int pageNo,int pageSize) throws IOException {
if (pageNo<=1){
pageNo = 1;
}
// 条件搜索
SearchRequest searchRequest = new SearchRequest("ljh");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
// 分页
searchSourceBuilder.from(pageNo);
searchSourceBuilder.size(pageSize);
// 精确匹配
TermsQueryBuilder termsQueryBuilder = new TermsQueryBuilder("name", key);
searchSourceBuilder.query(termsQueryBuilder);
searchSourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
// 构建高亮
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("name");
highlightBuilder.requireFieldMatch(true); // 多个高亮显示
highlightBuilder.preTags("<span style='color:red'>");
highlightBuilder.postTags("</span>");
searchSourceBuilder.highlighter(highlightBuilder);
// 执行搜索
searchRequest.source(searchSourceBuilder);
SearchResponse search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
// 解析结果
ArrayList<Map<String, Object>> objects = new ArrayList<>();
for (SearchHit hit : search.getHits().getHits()) {
Map<String, HighlightField> highlightFields = hit.getHighlightFields();
HighlightField name = highlightFields.get("name");
Map<String, Object> sourceAsMap = hit.getSourceAsMap();//原来的结果
// 解析高亮字段,将以前的字段替换成我们的新字段
if (name!=null) {
Text[] fragment = name.fragments();
String n_ame = "";
for (Text text : fragment) {
n_ame += text;
}
sourceAsMap.put("name",n_ame); //替换字段
}
objects.add(sourceAsMap);
}
return objects;
}
}
controller层
@RestController
public class indexController {
@Autowired
private ContentService contentService;
@GetMapping
public String index(){
return "index";
}
@GetMapping("/index/{keyword}")
public Boolean search(@PathVariable("keyword") String keyword) throws Exception {
return contentService.insertElasticearch(keyword);
}
@GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
public List<Map<String,Object>> search(@PathVariable("keyword") String keyword,
@PathVariable("pageNo") int pageNo,
@PathVariable("pageSize") int pageSize) throws IOException {
return contentService.searchHighlightedPage(keyword,pageNo,pageSize);
}
}
最终效果图