ElasticSearch仿京东搜索

一:爬取京东数据

package com.esjd.Utils;

import lombok.SneakyThrows;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.net.MalformedURLException;
import java.net.URL;

public class HtmlParseUtil {
    @SneakyThrows
    public static void main(String[] args)   {
        //获取请求 https://search.jd.com/Search?keyword=java 需要联网
        String url = "https://search.jd.com/Search?keyword=java";


        //解析网页 jsoup返回的 Document就是游览器 Document对象
        Document document = Jsoup.parse(new URL(url),30000);

        //获取网页idJ_goodsList
        Element element = document.getElementById("J_goodsList");
      System.out.println(element.html());
        //获取所有的li元素
        Elements elements = document.getElementsByTag("li");
        for (Element element1 : elements) {
                String img = element1.getElementsByTag("img").eq(0).attr("data-lazy-img");
                String price = element1.getElementsByClass("p-price").eq(0).text();
                String title = element1.getElementsByClass("p-name").eq(0).text();
                System.out.println("______________________________________--");
                System.out.println(img);
                System.out.println(price);
                System.out.println(title);
        }
    }
}
封装成工具类
@SneakyThrows
public List<Content> paresJD(String keyword){
    //获取请求 https://search.jd.com/Search?keyword=java 需要联网
    String urlKeywords = URLEncoder.encode(keyword, "UTF-8");

    //获取请求 https://search.jd.com/Search?keyword=java
    //前提: 需要联网, 而且不能获取到AJAX!
    String url ="https://search.jd.com/Search?keyword=" + urlKeywords + "&enc=utf-8";


    //解析网页 jsoup返回的 Document就是游览器 Document对象
    Document document = Jsoup.parse(new URL(url),30000);

    //获取网页idJ_goodsList
    Element element = document.getElementById("J_goodsList");
    //System.out.println(element.html());
    //获取所有的li元素
    Elements elements = document.getElementsByTag("li");


    ArrayList<Content>  goodsList = new ArrayList<>();
    for (Element element1 : elements) {
        if (element1.attr("class").equalsIgnoreCase("gl-item")) {
            String img = element1.getElementsByTag("img").eq(0).attr("data-lazy-img");
            String price = element1.getElementsByClass("p-price").eq(0).text();
            String title = element1.getElementsByClass("p-name").eq(0).text();
       Content content = new Content();
        content.setTitle(title);
        content.setPrice(price);
        content.setImg(img);
        goodsList.add(content);
    }
    }
    return goodsList;
}
编写pojo类
@Data
@AllArgsConstructor
@NoArgsConstructor

public class Content {
    //根据业务需求自己添加属性
    private  String title;
    private String img;
    private String price;

}
解析数据到es中
  @Autowired
//  不能直接使用     @Autowired 需要spring容器
  private RestHighLevelClient restHighLevelClient;


  //解析数据放入es中
  public Boolean  parseContent(String keywords) throws IOException {
      List<Content> contents = new HtmlParseUtil().paresJD(keywords);
      //把查询的数据放入es中
      BulkRequest bulkRequest = new BulkRequest();
      bulkRequest.timeout("2m");
      for (int i = 0; i < contents.size(); i++) {
          System.out.println(JSON.toJSONString(contents.get(i)));
          bulkRequest.add(new IndexRequest("jd_goods")
                      .source(JSON.toJSONString(
                              contents.get(i)), XContentType.JSON));

      }

      BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
      //判断返回是否成功
      return  !bulk.hasFailures();

  }}
对应的controller接口
@Autowired
private  ContentService contentService;


//爬取数据到es中
@GetMapping("/pares/{keyword}")
public Boolean pares(@PathVariable("keyword") String  keyword) throws IOException{
     return  contentService.parseContent(keyword);
}
二:前后端分离进行搜索实现

搜索实现和搜索高亮实现

新建前端模板进行请求接口编写

new Vue({
     el:"#app",
    data:{
         keyword: '',
        results: []
    },
    methods:{
         searchKey(){
             var keyword = this.keyword;
             console.log(keyword);
             //搜索分页
             // axios.get("search/"+keyword+"/1/10").then(response =>{
             //     console.log(response);
             //     //绑定数据
             //     this.results = response.data;
             // })
             //实现搜索高亮
             axios.get("/HighlightBuilder/"+keyword+"/1/10").then(response =>{
                 console.log(response);
                 //绑定数据
                 this.results = response.data;
             })
         }
    }
})
编写service层
  //2. 获取这些数据实现搜索功能
    public List<Map<String ,Object>> searchPage(String keyword ,int  pageNo,int pageSize) throws IOException {
        if(pageNo<=1){
            pageNo = 1;
        }

        //条件搜索
        SearchRequest searchRequest = new SearchRequest("jd_goods");
        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();


        //分页
        sourceBuilder.from(pageNo);
        sourceBuilder.size(pageSize);
        //精准匹配
        TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);

        sourceBuilder.query(termQueryBuilder);
        sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));



        //执行搜索
        searchRequest.source(sourceBuilder);
        SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

        //解析结果
        ArrayList<Map<String,Object>> list = new ArrayList<>();
        for (SearchHit documentFields : searchResponse.getHits().getHits()) {
            //把所有结果遍历出来然后封装到list集合里面
           list.add( documentFields.getSourceAsMap());
        }


        return  list;
    }

    //2. 获取这些数据实现搜索高亮功能
    public List<Map<String ,Object>> searchHighlightBuilder(String keyword ,int  pageNo,int pageSize) throws IOException {
        if(pageNo<=1){
            pageNo = 1;
        }

        //条件搜索
        SearchRequest searchRequest = new SearchRequest("jd_goods");
        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();


        //分页
        sourceBuilder.from(pageNo);
        sourceBuilder.size(pageSize);
        //精准匹配
        TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);

        sourceBuilder.query(termQueryBuilder);
        sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));

        //高亮
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        //设置标题高亮
        highlightBuilder.field("title");
        //关闭多个高亮字段显示
        //highlightBuilder.requireFieldMatch(true);
        //设置高亮样式
        highlightBuilder.preTags("<span style='color:red'>");
        highlightBuilder.postTags("</span>");
        sourceBuilder.highlighter(highlightBuilder);



        //执行搜索
        searchRequest.source(sourceBuilder);
        SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

        //解析结果
        ArrayList<Map<String,Object>> list = new ArrayList<>();
        for (SearchHit hit : searchResponse.getHits().getHits()) {


            //解析高亮的字段
            Map<String, HighlightField> highlightFields = hit.getHighlightFields();
            //获取标题
            HighlightField title = highlightFields.get("title");
            //原来的结果
            Map<String, Object> sourceAsMap = hit.getSourceAsMap();
            //解析高亮字段 把原先的字段替换为高亮字段
            if (title!= null){
                Text[] fragments = title.fragments();
                StringBuilder n_title = new StringBuilder();
                for (Text text : fragments) {
                    n_title.append(text);
                }
                sourceAsMap.put("title", n_title.toString());


            }
//            if (title!= null){
//                Text[] fragments = title.fragments();
//                String n_title = "";
//                for (Text text : fragments) {
//                    n_title+= text;
//                }
//                sourceAsMap.put("title",n_title);
//
//
//            }

            //把所有结果遍历出来然后封装到list集合里面
            list.add(sourceAsMap);

        }


        return  list;
    }
实现的接口controller
//对数据进行分页
@GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
public List<Map<String,Object>> search(@PathVariable("keyword") String keyword,
                                       @PathVariable("pageNo") int pageNo,
                                       @PathVariable("pageSize")int pageSize) throws IOException {

    return  contentService.searchPage(keyword, pageNo, pageSize);
}

//高亮
@GetMapping("HighlightBuilder/{keyword}/{pageNo}/{pageSize}")
public List<Map<String,Object>> searchHighlightBuilder(@PathVariable("keyword") String keyword,
                                       @PathVariable("pageNo") int pageNo,
                                       @PathVariable("pageSize")int pageSize) throws IOException {

    return  contentService.searchHighlightBuilder(keyword, pageNo, pageSize);
}

null){
Text[] fragments = title.fragments();
StringBuilder n_title = new StringBuilder();
for (Text text : fragments) {
n_title.append(text);
}
sourceAsMap.put(“title”, n_title.toString());

        }

// if (title!= null){
// Text[] fragments = title.fragments();
// String n_title = “”;
// for (Text text : fragments) {
// n_title+= text;
// }
// sourceAsMap.put(“title”,n_title);
//
//
// }

        //把所有结果遍历出来然后封装到list集合里面
        list.add(sourceAsMap);

    }


    return  list;
}

#### 实现的接口controller

//对数据进行分页
@GetMapping(“/search/{keyword}/{pageNo}/{pageSize}”)
public List<Map<String,Object>> search(@PathVariable(“keyword”) String keyword,
@PathVariable(“pageNo”) int pageNo,
@PathVariable(“pageSize”)int pageSize) throws IOException {

return  contentService.searchPage(keyword, pageNo, pageSize);

}

//高亮
@GetMapping(“HighlightBuilder/{keyword}/{pageNo}/{pageSize}”)
public List<Map<String,Object>> searchHighlightBuilder(@PathVariable(“keyword”) String keyword,
@PathVariable(“pageNo”) int pageNo,
@PathVariable(“pageSize”)int pageSize) throws IOException {

return  contentService.searchHighlightBuilder(keyword, pageNo, pageSize);

}

  • 5
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值