ElasticSearch+Jd项目学习笔记(狂神)

创建一个SpringBoot项目

<dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.73</version>
        </dependency>

        <!-- jsoup解析页面 -->
        <!-- 解析网页 爬视频可 研究tiko -->
        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.10.2</version>
        </dependency>

        <!-- thymeleaf -->
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-thymeleaf</artifactId>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-devtools</artifactId>
            <scope>runtime</scope>
            <optional>true</optional>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-configuration-processor</artifactId>
            <optional>true</optional>
        </dependency>
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <optional>true</optional>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>

配置文件

server:
  port: 9090

#关闭thymeleaf的缓存
spring:
  thymeleaf:
    cache: false



在这里插入图片描述

爬取数据

下面这个依赖可以解析网页,如果想要解析电影,音乐可以学习tiko

	    <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.10.2</version>
        </dependency>

工具类:
在这里插入图片描述
查看id
在这里插入图片描述

在这里插入图片描述
完善工具类

package com.dongmu.util;

import com.dongmu.pojo.JdGoodsContent;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;

import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;


@Component
public class HtmlParseUtil {

//    public static void main(String[] args) throws IOException {
//        for (JdGoodsContent java : new HtmlParseUtil().parseJd("asoul")) {
//            System.out.println(java);
//        }
//    }

    public List<JdGoodsContent> parseJd(String keyword) throws IOException {
        //获取请求,前提需要联网
        String url = "https://search.jd.com/Search?keyword="+keyword;

        //解析网页,返回的Document就是页面对象
        Document document = Jsoup.parse(new URL(url), 30000);
        //获取对象
        Element j_goodsList = document.getElementById("J_goodsList");

        //获取所有的li元素
        Elements li = j_goodsList.getElementsByTag("li");

        List<JdGoodsContent> list = new ArrayList<>();

        //遍历li元素
        for (Element element : li) {
            //eq(0)是获取所有img标签的第一个
            //图片很多的网站,所有的图片都是延迟加载的所以这里直接拿拿不到
//            String imgUrl = element.getElementsByTag("img").eq(0).attr("src");
            //要使用source-data-lazy-img获取
            String imgUrl = element.getElementsByTag("img").eq(0).attr("data-lazy-img");
            String goodPrice = element.getElementsByClass("p-price").eq(0).text();
            String goodName = element.getElementsByClass("p-name").eq(0).text();
            list.add(new JdGoodsContent(goodName,imgUrl,goodPrice));

//            System.out.println(imgUrl);
//            System.out.println(goodPrice);
//            System.out.println(goodName);
        }

        return list;
    }
}

编写业务

service类

package com.dongmu.service;


import com.alibaba.fastjson.JSON;
import com.dongmu.pojo.JdGoodsContent;
import com.dongmu.util.HtmlParseUtil;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.xcontent.XContentType;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import java.io.IOException;
import java.util.List;

@Service
public class JdGoodsService {
    @Autowired
    private RestHighLevelClient restHighLevelClient;

    //1:解析数据放入es的索引中
    public boolean parseGoods(String keyword) throws IOException {
        List<JdGoodsContent> list = new HtmlParseUtil().parseJd(keyword);

        //把查询的数据放入es中

        BulkRequest bulkRequest = new BulkRequest();

        bulkRequest.timeout("2m");

        for (JdGoodsContent jdGoodsContent : list) {
            bulkRequest.add(new IndexRequest("jd_goods")
                    .source(JSON.toJSONString(jdGoodsContent), XContentType.JSON)
            );
        }

        BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);

        return !bulk.hasFailures();
    }

}

把业务服务改成高亮的版本

package com.dongmu.service;


import com.alibaba.fastjson.JSON;
import com.dongmu.pojo.JdGoodsContent;
import com.dongmu.util.HtmlParseUtil;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;

@Service
public class JdGoodsService {
    @Autowired
    private RestHighLevelClient restHighLevelClient;

    //1:解析数据放入es的索引中
    public boolean parseGoods(String keyword) throws IOException {
        List<JdGoodsContent> list = new HtmlParseUtil().parseJd(keyword);

        //把查询的数据放入es中

        BulkRequest bulkRequest = new BulkRequest();

        bulkRequest.timeout("2m");

        for (JdGoodsContent jdGoodsContent : list) {
            bulkRequest.add(new IndexRequest("jd_goods")
                    .source(JSON.toJSONString(jdGoodsContent), XContentType.JSON)
            );
        }

        BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);

        return !bulk.hasFailures();
    }

    //2:获取这些数据实现搜索功能
    public List<Map<String,Object>> searchPage(String keyword,int pageNo,int pageSize) throws IOException {
        if (pageNo<=1){
            pageNo = 1;
        }

        //条件搜索
        SearchRequest searchRequest = new SearchRequest("jd_goods");
        //分页
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();

        searchSourceBuilder.from(pageNo);
        searchSourceBuilder.size(pageSize);
        //精准匹配
        searchSourceBuilder.query(QueryBuilders.termQuery("title",keyword));

        //高亮设置
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        highlightBuilder.field("title");
        highlightBuilder.preTags("<span style='color:red'>");
        highlightBuilder.postTags("</span>");
        //关闭多个高亮
//        highlightBuilder.requireFieldMatch(false);
        searchSourceBuilder.highlighter(highlightBuilder);


        //设置查询的时间
        searchSourceBuilder.timeout(new TimeValue(60,TimeUnit.SECONDS));

        //执行搜索
        searchRequest.source(searchSourceBuilder);

        SearchResponse search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

        //解析结果
        SearchHits hits = search.getHits();
        List<Map<String,Object>> list = new ArrayList<>();
        for (SearchHit hit : hits) {

            //解析高亮的字段,
            Map<String, HighlightField> highlightFields = hit.getHighlightFields();
            HighlightField title = highlightFields.get("title");

            //原来的结果
            list.add(hit.getSourceAsMap());

            Map<String, Object> sourceAsMap = hit.getSourceAsMap();

            if (title!=null){
                Text[] fragments = title.fragments();
                //不能用StringBuilder
//                StringBuilder newTitle = new StringBuilder();
                String newTitle = "";
                for (Text fragment : fragments) {
//                    newTitle.append(fragments);
                    newTitle +=fragment;
                }
                //把原来的结果替换掉
                sourceAsMap.put("title",newTitle);
            }
        }

        return list;

    }

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

北海冥鱼未眠

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值