ES仿京东实战+jsoup

数据源:用爬虫获取。

1.导入依赖jsoup(解析网页)

<dependency>
    <artifactId>elasticsearch</artifactId>
    <groupId>org.elasticsearch</groupId>
    <version>7.6.1</version>
</dependency>
<dependency>
    <groupId>org.elasticsearch.plugin</groupId>
    <artifactId>transport-netty4-client</artifactId>
    <version>7.6.1</version>
</dependency>
<dependency>
    <groupId>org.elasticsearch.client</groupId>
    <artifactId>transport</artifactId>
    <version>7.6.1</version>
</dependency>
<dependency>
    <groupId>org.elasticsearch.client</groupId>
    <artifactId>elasticsearch-rest-high-level-client</artifactId>
    <version>7.6.1</version>
</dependency>
<dependency>
    <groupId>org.elasticsearch.client</groupId>
    <artifactId>elasticsearch-rest-client</artifactId>
    <version>7.6.1</version>
</dependency>

<dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.15.3</version>
</dependency>

2.京东页面现在有反爬虫机制,需要cookie绕过安全验证界面

需要找到cookie中,thor键对应的值

封装工具类

pojo层

service层

@Autowired
@Qualifier("restHighLevelClient")
private RestHighLevelClient client;

public Boolean parseJD(String keyword) throws Exception{
    List<Content> contents = new HtmlParseUtil().parseJd(keyword);
    BulkRequest bulkRequest = new BulkRequest();
    bulkRequest.timeout("100s");
    for (int i = 0; i < contents.size(); i++) {
        bulkRequest.add(
                new IndexRequest("jdgoods")
                        .source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
    }
    BulkResponse bulk = client.bulk(bulkRequest, RequestOptions.DEFAULT);
    return !bulk.hasFailures();

}

public List<Map<String,Object>> search(String keyWord,int pageno,int pageSize) throws IOException {
    SearchRequest searchRequest = new SearchRequest("jdgoods");
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery("title", keyWord);
    searchSourceBuilder.query(matchQueryBuilder);
    searchSourceBuilder.from(pageno);
    searchSourceBuilder.size(pageSize);
    searchRequest.source(searchSourceBuilder);
    //构建高亮
    HighlightBuilder highlightBuilder = new HighlightBuilder();
    highlightBuilder.field("title");//构建高亮字段
    highlightBuilder.requireFieldMatch();//是否多个字段高亮
    highlightBuilder.preTags("<span style='color:red'>");
    highlightBuilder.postTags("</span>");
    searchSourceBuilder.highlighter(highlightBuilder);
    SearchResponse search = client.search(searchRequest, RequestOptions.DEFAULT);
    ArrayList<Map<String,Object>> list = new ArrayList();
    for (SearchHit hit : search.getHits().getHits()) {
        //解析高亮字段,就是把原来结果集中的普通字段替换掉
        Map<String, HighlightField> highlightFields = hit.getHighlightFields();
        HighlightField title = highlightFields.get("title");
        Map<String, Object> sourceAsMap = hit.getSourceAsMap();//原来的结果集
        StringBuilder n_title = new StringBuilder();
        if (title!=null){
            Text[] fragments = title.fragments();//取出字段
            for (Text fragment : fragments) {
                n_title.append(fragment);
            }
            sourceAsMap.put("title",n_title);
        }

        list.add(sourceAsMap);
    }
    return list;

}

controller

@RestController
public class IndexController {

    @Autowired
    @Qualifier("restHighLevelClient")
    private RestHighLevelClient client;

    @Autowired
    private ContentService contentService;


    @GetMapping("/parse/{keyword}")
    public Boolean parase(@PathVariable("keyword") String keyword) throws Exception {
        Boolean aBoolean = contentService.parseJD(keyword);
        return aBoolean;
    }

    @GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
    public List<Map<String,Object>> search(@PathVariable("keyword") String keyword,
                                           @PathVariable("pageNo") int pageNo,
                                           @PathVariable("pageSize") int pageSize) throws IOException {
        return contentService.search(keyword,pageNo,pageSize);
    }


}

html

<!DOCTYPE html>
<html xmlns:th="http://www.thymeleaf.org">

<head>
    <meta charset="utf-8"/>
    <title>ES仿京东实战</title>
    <link rel="stylesheet" th:href="@{/css/style.css}"/>
    <script th:src="@{/js/jquery.min.js}"></script>
</head>

<body class="pg">
<div class="page" id="app">
    <div id="mallPage" class=" mallist tmall- page-not-market ">

        <!-- 头部搜索 -->
        <div id="header" class=" header-list-app">
            <div class="headerLayout">
                <div class="headerCon ">
                    <!-- Logo-->
                    <h1 id="mallLogo">
                        < img th:src="@{/images/jdlogo.png}" alt="">
                    </h1>

                    <div class="header-extra">

                        <!--搜索-->
                        <div id="mallSearch" class="mall-search">
                            <form name="searchTop" class="mallSearch-form clearfix">
                                <fieldset>
                                    <legend>天猫搜索</legend>
                                    <div class="mallSearch-input clearfix">
                                        <div class="s-combobox" id="s-combobox-685">
                                            <div class="s-combobox-input-wrap">
                                                <input v-model="keyword" type="text" autocomplete="off" value="dd" id="mq"
                                                       class="s-combobox-input" aria-haspopup="true">
                                            </div>
                                        </div>
                                        <button type="submit" @click.prevent="searchKey" id="searchbtn">搜索</button>
                                    </div>
                                </fieldset>
                            </form>
                            <ul class="relKeyTop">
                                <li><a>Java</a ></li>
                                <li><a>前端</a ></li>
                                <li><a>Linux</a ></li>
                                <li><a>大数据</a ></li>
                                <li><a>聊理财</a ></li>
                            </ul>
                        </div>
                    </div>
                </div>
            </div>
        </div>
<!-- 商品详情页面 -->
<div id="content">
    <div class="main">
        <!-- 品牌分类 -->
        <form class="navAttrsForm">
            <div class="attrs j_NavAttrs" style="display:block">
                <div class="brandAttr j_nav_brand">
                    <div class="j_Brand attr">
                        <div class="attrKey">
                            品牌
                        </div>
                        <div class="attrValues">
                            <ul class="av-collapse row-2">
                                <li> this is test </li>
                                <li> Java </li>
                            </ul>
                        </div>
                    </div>
                </div>
            </div>
        </form>
 <!-- 排序规则 -->
                <div class="filter clearfix">
                    <a class="fSort fSort-cur">综合<i class="f-ico-arrow-d"></i></a >
                    <a class="fSort">人气<i class="f-ico-arrow-d"></i></a >
                    <a class="fSort">新品<i class="f-ico-arrow-d"></i></a >
                    <a class="fSort">销量<i class="f-ico-arrow-d"></i></a >
                    <a class="fSort">价格<i class="f-ico-triangle-mt"></i><i class="f-ico-triangle-mb"></i></a >
                </div>

                <!-- 商品详情 -->
                <div class="view grid-nosku">

                    <div class="product" v-for="result in results">
                        <div class="product-iWrap">
                            <!--商品封面-->
                            <div class="productImg-wrap">
                                <a class="productImg">
                                    < img :src="result.img">
                                </a >
                            </div>
                            <!--价格-->
                            <p class="productPrice">
                                <em>{{result.price}}</em>
                            </p >
                            <!--标题-->
                            <p class="productTitle">
                                <a v-html="result.title"></a >
                            </p >
                            <!-- 店铺名 -->
                            <div class="productShop">
                                    <span>店铺: ZXWJava </span>
                            </div>
                            <!-- 成交信息 -->
                            <p class="productStatus">
                                <span>月成交<em>999笔</em></span>
                                <span>评价 <a>3</a ></span>
                            </p >
                        </div>
                    </div>
                </div>
            </div>
        </div>
    </div>
</div>
<script th:src="@{/js/axios.min.js}"></script>
<script th:src="@{/js/vue.min.js}"></script>
<script>
    new Vue({
        el: '#app',
        data: {
            keyword: '',//搜索关键字
            results: [] //搜索结果
        },
        methods: {
            searchKey(){
                var keyword = this.keyword;
                axios.get('search/'+keyword+"/2/10").then(
                    response=>{
                       this.results = response.data;
                    }
                )
            }
        }
    })
</script>

</body>
</html>

成品展示

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值