学习目标:项目实战elasticsearch
数据是爬虫jd页面数据同步elasticsearch
1. elasticsearch 配置类
public class ElasticSearchClientConfig {
@Bean
public RestHighLevelClient restHighLevelClient(){
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(new HttpHost("127.0.0.1", 9200, "http"))
);
return client;
}
}
1. 实体类
@Data
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class Content {
private String img;
private String price;
private String title;
}
2. 爬虫数据代码
public class HtmlParseUtil {
public List<Content> parse(String keyWord)throws Exception{
String url = "https://search.jd.com/Search?keyword="+ keyWord;
//解析网页
Document document = Jsoup.parse(new URL(url), 30000);
//js中可以使用的方法
Element element = document.getElementById("J_goodsList");
ArrayList<Content> contentList = new ArrayList<>();
//获取元素中的内容
Elements elements = element.getElementsByTag("li");
for (Element el : elements) {
String img = el.getElementsByTag("img").eq(0).attr("src");
String price = el.getElementsByClass("p-price").eq(0).text();
String title = el.getElementsByClass("p-name").eq(0).text();
Content build = Content.builder()
.img(img)
.price(price)
.title(title)
.build();
contentList.add(build);
}
return contentList;
}
}
3.业务层
@Autowired
private RestHighLevelClient restHighLevelClient;
public Boolean parse(String keyWords) throws Exception {
HtmlParseUtil htmlParseUtil = new HtmlParseUtil();
//把查询的数据放入到es中
BulkRequest bulkRequest = new BulkRequest();
List<Content> contents = htmlParseUtil.parse(keyWords);
for (int i = 0; i < contents.size(); i++) {
bulkRequest.add(
new IndexRequest("goods")
.id(""+(i+1))
.source(JSON.toJSONString(contents.get(i)), XContentType.JSON)
);
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
return !bulk.hasFailures();
}
}
5. Controller层
@RestController
public class ContentController {
@Autowired
private ContentService contentService;
@RequestMapping("/parse")
public Boolean parse(String keyWords) throws Exception {
return contentService.parse(keyWords);
}
}
6.测试结果
7.es查询数据
public List<Map<String,Object>> searchGoods(String keyWords,int pageNo,int pageSize)
throws IOException {
//创建搜索条件
SearchRequest searchRequest = new SearchRequest("goods");
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
//分页
sourceBuilder.from(pageNo);
sourceBuilder.size(pageSize);
//精确匹配
TermQueryBuilder queryBuilder = QueryBuilders.termQuery("title", keyWords);
sourceBuilder.query(queryBuilder);
//执行搜索
searchRequest.source(sourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
//解析结果
ArrayList<Map<String,Object>> list = new ArrayList<>();
for (SearchHit searchHit : searchResponse.getHits().getHits()) {
list.add(searchHit.getSourceAsMap());
}
return list;
}
8.访问测试
@RequestMapping("/searchGoods")
public List<Map<String,Object>> searchGoods(@RequestParam("keyWords")String keyWords,
@RequestParam("pageNo")int pageNo,
@RequestParam("pageSize")int pageSize) throws IOException {
return contentService.searchGoods(keyWords,pageNo,pageSize);
}
9.实现高亮效果
public List<Map<String,Object>> searchHigth(String keyWords,int pageNo,int pageSize) throws IOException {
//创建搜索条件
SearchRequest searchRequest = new SearchRequest("goods");
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
//分页
sourceBuilder.from(pageNo);
sourceBuilder.size(pageSize);
//高亮
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("title");
//多个高亮显示
highlightBuilder.requireFieldMatch(false);
highlightBuilder.preTags("<span style='color:red'>");
highlightBuilder.postTags("</span>");
//精确匹配
TermQueryBuilder queryBuilder = QueryBuilders.termQuery("title", keyWords);
sourceBuilder.query(queryBuilder);
//执行搜索
searchRequest.source(sourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
//解析结果
ArrayList<Map<String,Object>> list = new ArrayList<>();
for (SearchHit searchHit : searchResponse.getHits().getHits()) {
Map<String, HighlightField> highlightFields = searchHit.getHighlightFields();
HighlightField title = highlightFields.get("title");
Map<String, Object> sourceAsMap = searchHit.getSourceAsMap();
//解析高亮的字段,将原来的字段换为我们高亮的字段
if (title != null){
Text[] fragments = title.fragments();
String nTitle = "";
for (Text text : fragments) {
nTitle += text;
}
sourceAsMap.put("title",nTitle);
}
list.add(sourceAsMap);
}
return list;
}