ElasticSearch入门学习笔记(三)SpringBoot整合篇

一、项目所需

1.1导入依赖

<properties>
    <java.version>1.8</java.version>
    <!-- 统一版本 -->
    <elasticsearch.version>7.6.1</elasticsearch.version>
</properties>

导入elasticsearch

<dependency>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>

提前导入fastjson、lombok

<dependency>
    <groupId>com.alibaba</groupId>
    <artifactId>fastjson</artifactId>
    <version>1.2.70</version>
</dependency>
<!-- lombok需要安装插件 -->
<dependency>
    <groupId>org.projectlombok</groupId>
    <artifactId>lombok</artifactId>
    <optional>true</optional>
</dependency>

1.2创建并编写配置类–>连上ES

@Configuration
public class ElasticSearchConfig {
    // 注册 rest高级客户端 
    @Bean
    public RestHighLevelClient restHighLevelClient(){
        RestHighLevelClient client = new RestHighLevelClient(
                RestClient.builder(
                        new HttpHost("127.0.0.1",9200,"http")
                )
        );
        return client;
    }
}

1.3测试索引的操作

1、索引的创建
	@Test
	void testCreateIndex() throws IOException {
		//1、创建索引请求
		CreateIndexRequest request = new CreateIndexRequest("jd_goods");
		//2、执行创建请求
        CreateIndexResponse createIndexResponse =
                restHighLevelClient.indices().create(request, RequestOptions.DEFAULT);
        System.out.println("执行创建请求===>"+createIndexResponse);
    }
2、测试获取索引
	@Test
    void testExistIndex() throws IOException {
        GetIndexRequest request = new GetIndexRequest("kuang_index");
        boolean exists = restHighLevelClient.indices().exists(request, RequestOptions.DEFAULT);
        System.out.println("测试获取索引===>"+exists);
    }
3、测试删除索引
	@Test
    void testDeleteIndex() throws IOException {
        DeleteIndexRequest request = new DeleteIndexRequest("kuang_index");
        AcknowledgedResponse delete = restHighLevelClient.indices().delete(request, RequestOptions.DEFAULT);
        System.out.println("是否删除成功===>"+delete);
    }
4、测试添加文档
	@Test
    void testAddDocument() throws IOException {
        //创建对象
        User user = new User("狂神说",3);
        //创建请求
        IndexRequest request = new IndexRequest("kuang_index");

        //规则
        request.id("1");
        request.timeout(TimeValue.timeValueSeconds(1));
        request.timeout("1s");

        //将我们的数据放入请求 json
        request.source(JSON.toJSONString(user), XContentType.JSON);

        //客户端发送请求 获取响应的结果
        IndexResponse indexResponse = restHighLevelClient.index(request, RequestOptions.DEFAULT);

        System.out.println(indexResponse.toString());
        System.out.println(indexResponse.status());//对应我们命令返回的状态 CREATED
    }
5、获取文档 判断是否存在
	@Test
    void testIsExists() throws IOException {
        GetRequest getRequest = new GetRequest("kuang_index","1");
        //不获取返回的 _source的上下文
        getRequest.fetchSourceContext(new FetchSourceContext(false));
        getRequest.storedFields("_none_");

        boolean exists = restHighLevelClient.exists(getRequest, RequestOptions.DEFAULT);
        System.out.println(exists);
    }
6、获取文档的信息
	@Test
    void testGetDocument() throws IOException {
        GetRequest getRequest = new GetRequest("kuang_index","1");
        GetResponse documentFields = restHighLevelClient.get(getRequest, RequestOptions.DEFAULT);
        System.out.println(getRequest);
        System.out.println(documentFields);
    }
7、更新文档的信息
	@Test
    void testUpdateRequest() throws IOException {
        UpdateRequest updateRequest = new UpdateRequest("test","1");
        updateRequest.timeout("1s");
        User user = new User("狂神说java",18);
        updateRequest.doc(JSON.toJSONString(user),XContentType.JSON);
        UpdateResponse updateResponse = restHighLevelClient.update(updateRequest, RequestOptions.DEFAULT);

        System.out.println(updateResponse.status());
    }
8、删除文档记录
	@Test
    void testDeleteRequest() throws IOException {
        DeleteRequest request = new DeleteRequest("kuang_index","2");
        request.timeout("1s");

        DeleteResponse delete = restHighLevelClient.delete(request, RequestOptions.DEFAULT);
        System.out.println(delete.status());
    }
9、批量插入数据
	@Test
    void testBulkRequest() throws IOException {
        BulkRequest bulkRequest = new BulkRequest();
        bulkRequest.timeout("10s");

        ArrayList<User> userList = new ArrayList<>();
        userList.add(new User("kuangshen1",3));
        userList.add(new User("kuangshen2",4));
        userList.add(new User("kuangshen3",5));
        userList.add(new User("kuangshen4",6));
        userList.add(new User("kuangshen5",13));
        userList.add(new User("kuangshen6",23));
        userList.add(new User("kuangshen7",33));

        // 批处理请求
        for (int i = 0; i < userList.size(); i++) {
            bulkRequest.add(new IndexRequest("kuang_index")
                    .id(""+(i+1))
                    .source(JSON.toJSONString(userList.get(i)),XContentType.JSON));
        }
        BulkResponse bulkResponse = restHighLevelClient.bulk(bulkRequest,RequestOptions.DEFAULT);
        System.out.println(bulkResponse.hasFailures());//是否失败 返回false 代表成功
    }
10、查询
	@Test
    void testSearch() throws IOException {
        SearchRequest searchRequest = new SearchRequest(ESconst.ES_INDEX);
        //构建搜索条件
        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        //高亮
        sourceBuilder.highlighter();
        //查询条件 我们可以使用 QueryBuilders 工具来实现
        //QueryBuilders.termQuery() 精确
        //QueryBuilders.matchAllQuery() 匹配所有
        TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("name", "qinjiang1");

        MatchAllQueryBuilder allQueryBuilder = QueryBuilders.matchAllQuery();
        System.out.println("allQueryBuilder===>>"+allQueryBuilder);

        sourceBuilder.query(termQueryBuilder);
        sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));

        //放入请求
        searchRequest.source(sourceBuilder);

        SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        System.out.println(JSON.toJSONString(searchResponse.getHits()));
        System.out.println("=====================================");
        for (SearchHit documentFields : searchResponse.getHits().getHits()) {
            System.out.println(documentFields.getSourceAsMap());
        }
    }

二、ElasticSearch实战

2.1导入依赖

<properties>
    <java.version>1.8</java.version>
    <elasticsearch.version>7.6.1</elasticsearch.version>
</properties>
<dependencies>
    <!-- jsoup解析页面 -->
    <!-- 解析网页 爬视频可 研究tiko -->
    <dependency>
        <groupId>org.jsoup</groupId>
        <artifactId>jsoup</artifactId>
        <version>1.10.2</version>
    </dependency>
    <!-- fastjson -->
    <dependency>
        <groupId>com.alibaba</groupId>
        <artifactId>fastjson</artifactId>
        <version>1.2.70</version>
    </dependency>
    <!-- ElasticSearch -->
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
    </dependency>
    <!-- thymeleaf -->
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-thymeleaf</artifactId>
    </dependency>
    <!-- web -->
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-web</artifactId>
    </dependency>
    <!-- devtools热部署 -->
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-devtools</artifactId>
        <scope>runtime</scope>
        <optional>true</optional>
    </dependency>
    <!--  -->
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-configuration-processor</artifactId>
        <optional>true</optional>
    </dependency>
    <!-- lombok 需要安装插件 -->
    <dependency>
        <groupId>org.projectlombok</groupId>
        <artifactId>lombok</artifactId>
        <optional>true</optional>
    </dependency>
    <!-- test -->
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-test</artifactId>
        <scope>test</scope>
    </dependency>
</dependencies>

2.2编写 application.preperties配置文件

# 更改端口,防止冲突
server.port=9999
# 关闭thymeleaf缓存
spring.thymeleaf.cache=false

2.3测试controller和view

@Controller
public class IndexController {
    @GetMapping({"/","index"})
    public String index(){
        return "index";
    }
}

三、爬虫京东的数据到ES

3.1编写Config

@Configuration
public class ElasticSearchClientConfig {
    @Bean
    public RestHighLevelClient restHighLevelClient(){
        RestHighLevelClient client = new RestHighLevelClient(
                RestClient.builder(
                        new HttpHost("127.0.0.1", 9200, "http")));
        return client;
    }
}

3.2编写service

因为是爬取的数据,那么就不走Dao,以下编写都不会编写接口,开发中必须严格要求编写

ContentService

	// 1、解析数据放入 es 索引中
    public Boolean parseContents(String keyword) throws Exception {
        // 获取内容
        List<Content> contents = new HtmlParseUtil().params(keyword);
        // 内容放入 es 中
        BulkRequest bulkRequest = new BulkRequest();
        bulkRequest.timeout("2m"); // 可更具实际业务是指
        for (int i = 0; i < contents.size(); i++) {
            bulkRequest.add(
                    new IndexRequest("jd_goods")
                            .id(""+(i+1))
                            .source(JSON.toJSONString(contents.get(i)), XContentType.JSON)
            );
        }
        BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
        restHighLevelClient.close();
        return !bulk.hasFailures();
    }
    // 2、根据keywords分页查询结果并高亮
    public List<Map<String,Object>> highlightBuilder(String keywords, int pageNo, int pageSize) throws IOException {
        if(pageNo <= 1){
            pageNo = 1;
        }

        //条件查询
        SearchRequest searchRequest = new SearchRequest("jd_goods");
        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();

        //分页
        sourceBuilder.from(pageNo);
        sourceBuilder.size(pageSize);

        //精准匹配--因为term不会分词,而keyword也不会分词,但是IK会默认分词,所以不能在使用termQueryBuilder了
        //TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keywords);
        //完全配置模式
        MatchPhraseQueryBuilder termQueryBuilder = QueryBuilders.matchPhraseQuery("title", keywords);
       
        sourceBuilder.query(termQueryBuilder);
        sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));

        //高亮
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        highlightBuilder.requireFieldMatch(false);//多个高亮显示
        highlightBuilder.field("title");
        highlightBuilder.preTags("<span style='color:red'>");
        highlightBuilder.postTags("</span>");
        sourceBuilder.highlighter(highlightBuilder);

        //执行搜索
        searchRequest.source(sourceBuilder);
        SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

        //解析结果
        ArrayList<Map<String,Object>> list = new ArrayList<>();
        for (SearchHit documentFields : searchResponse.getHits().getHits()) {
            Map<String, HighlightField> fields = documentFields.getHighlightFields();
            HighlightField title = fields.get("title");
            Map<String, Object> sourceAsMap = documentFields.getSourceAsMap();//原来的结果
            //解析高亮的字段
            if(title!=null){
                Text[] texts = title.fragments();
                String newTitle = "";
                for (Text text : texts) {
                    newTitle += text;
                }
                sourceAsMap.put("title",newTitle);//高亮字段替换掉原来的内容即可
            }
            list.add(sourceAsMap);
        }
        return list;
    }

编写controller

	@Autowired
    private ContentService contentService;
    @GetMapping("/parse/{keywords}")
    @ResponseBody
    public Boolean parses(@PathVariable("keywords") String keywords) throws Exception {
        return contentService.parseContents(keywords);
    }

    @ResponseBody
    @GetMapping("/search/{keywords}/{pageNo}/{pageSize}")
    public List<Map<String,Object>> search(@PathVariable("keywords") String keywords,
                                           @PathVariable("pageNo") int pageNo,
                                           @PathVariable("pageSize") int pageSize) throws IOException {

        return contentService.highlightBuilder(keywords,1,10);
    }

3.2爬虫工具类

public class HtmlParseUtil {
    public static void main(String[] args) throws Exception {
        new HtmlParseUtil().params("码出高效").forEach(System.out::println);
    }

    public List<Content> params(String keywords) throws Exception {
        //获取请求 https://search.jd.com/Search?keyword=java
        //前提 需要联网
        String url = "https://search.jd.com/Search?keyword="+keywords+"&enc=utf-8";
        //解析网页
        Document document = Jsoup.parse(new URL(url), 30000);

        //所有你在js中可以使用的方法 这里都能用
        Element element = document.getElementById("J_goodsList");
        //获取所有的li元素
        Elements elements = element.getElementsByTag("li");

        ArrayList<Content> goodList = new ArrayList<>();

        for (Element el : elements) {
            //关于图片特别多的网站  所有图片都是延迟加载的data-lazy-img
            String img = el.getElementsByTag("img").eq(0).attr("data-lazy-img");
            String price = el.getElementsByClass("p-price").eq(0).text();
            String title = el.getElementsByClass("p-name").eq(0).text();
            Content content = new Content();
            content.setImg(img);
            content.setTitle(title);
            content.setPrice(price);
            goodList.add(content);

        }
        return goodList;
    }
}

3.3测试效果

在这里插入图片描述

待解决问题:

1、分词时 自定义分词 需手动建.dic 添加;

2、轮询时间如何设置为秒;

ElasticSearch入门学习笔记(一)概念篇
ElasticSearch入门学习笔记(二)软件安装篇
SpringBoot集成BBOSS-ElasticSearch实现ElasticSearch客户端
阿里云Docker安装ES\ES_Head\安装部署logstash导mysql数据入ElasticSearch
在这里插入图片描述

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

java亮小白1997

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值