es查询条数超过10000问题

第一种方式 游标方式

String filePath = "E:\\output.txt"; // 输出文件路径
        RestClientBuilder builder = RestClient.builder(new HttpHost("127.0.0.1", 9200, "http"));
        String indexName = "works";
        try (RestHighLevelClient client = new RestHighLevelClient(builder)) {
            String scrollId = null;
            int size = 1000;
            try {
                for (; ; ) {
                    SearchResponse response;
                    if (null == scrollId) {
                        SearchSourceBuilder source = new SearchSourceBuilder().query(QueryBuilders.matchAllQuery()).size(size)
                                .fetchSource(new String[]{"id"}, null);
                        SearchRequest searchRequest = new SearchRequest(new String[]{indexName}, source);
                        searchRequest.scroll(TimeValue.timeValueMinutes(5));
                        response = client.search(searchRequest, RequestOptions.DEFAULT);
                    } else {
                        SearchScrollRequest searchScrollRequest = new SearchScrollRequest(scrollId);
                        searchScrollRequest.scroll(TimeValue.timeValueMinutes(5));
                        response = client.scroll(searchScrollRequest, RequestOptions.DEFAULT);
                    }

                    if (null != response) {
                        scrollId = response.getScrollId();
                        SearchHits searchHits = response.getHits();

                        // 跳出
                        if (null == searchHits) {
                            break;
                        }

                        // 跳出
                        SearchHit[] hits = searchHits.getHits();
                        if (ArrayUtils.isEmpty(hits)) {
                            break;
                        }

                        // TODO 每条数据处理
                        List<String> lines = new ArrayList<>();
                        for (SearchHit hit : hits) {
                            JSONObject jsonObject = JSONObject.parseObject(hit.getSourceAsString());
                            Object o = jsonObject.get("id");
                            lines.add(o.toString());
                        }
                        try (BufferedWriter writer = new BufferedWriter(new FileWriter(filePath, true))) {
                            // 循环写入每一行
                            for (String line : lines) {
                                writer.write(line);
                                writer.newLine(); // 写入换行符
                            }
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                    }

                }
            } finally {
                // 清理游标
                if (null != scrollId) {
                    ClearScrollRequest clearScrollRequest = new ClearScrollRequest();
                    clearScrollRequest.addScrollId(scrollId);
                    client.clearScroll(clearScrollRequest, RequestOptions.DEFAULT);
                }
            }
        } catch (Exception e) {
            System.out.println("===error==" + e.getMessage());
            e.printStackTrace();
        }

 

第二种方式 searchAfter

String filePath = "C:\\Users\\admin\\Desktop\\output1.txt"; // 输出文件路径

        // 初始化RestHighLevelClient
        RestClientBuilder builder = RestClient.builder(new HttpHost("127.0.0.1", 9200, "http"));
        RestHighLevelClient client = new RestHighLevelClient(builder);
        // 创建查询请求并设置索引名
        SearchRequest searchRequest = new SearchRequest("works");

        // 设置分页参数
        int page = 1; // 当前页码
        int pageSize = 10000; // 每页显示数量
        int from = 0;
        System.out.println("index:"+ page+";; from:"+from+";; pageSize:"+pageSize);
        // 构建查询条件
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        searchSourceBuilder.query(QueryBuilders.queryStringQuery("status:1"));
        searchSourceBuilder.from(from);
        searchSourceBuilder.size(pageSize);
        searchSourceBuilder.sort("id", SortOrder.DESC);
        searchSourceBuilder.trackTotalHits(true);

        searchRequest.source(searchSourceBuilder);

        // 设置只获取特定字段
        searchRequest.source().fetchSource(new String[]{"id"}, new String[]{});

        // 执行查询
        SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);

        List<String> lines = new ArrayList<>();
        SearchHits hits = searchResponse.getHits();
        long value = hits.getTotalHits().value;
        if(value == 0){
            return;
        }
        SearchHit[] searchHists = hits.getHits();
        Object[] sortValues = searchHists[searchHists.length - 1].getSortValues();
        // 遍历查询结果
        for (SearchHit hit : searchHists) {
            JSONObject jsonObject = JSONObject.parseObject(hit.getSourceAsString());
            Object o = jsonObject.get("id");
            lines.add(o.toString());
        }
        try (BufferedWriter writer = new BufferedWriter(new FileWriter(filePath))) {
            // 循环写入每一行
            for (String line : lines) {
                writer.write(line);
                writer.newLine(); // 写入换行符
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

        //往后的每次请求都携带上一次的sort_id进行访问。
        while (ArrayUtils.isNotEmpty(searchHists)){
            page++;
            lines = new ArrayList<>();
            from = (page - 1) * pageSize;
            System.out.println("index:"+ page+";; from:"+from+";; pageSize:"+pageSize+";;time:"+ DateUtil.format(new Date(), "YYYY-MM-dd HH:mm:ss"));
            searchSourceBuilder.searchAfter(sortValues);
//            searchSourceBuilder.from(from);
//            searchSourceBuilder.size(pageSize);
            searchRequest.source(searchSourceBuilder);
            SearchResponse searchResponseAfter = client.search(searchRequest, RequestOptions.DEFAULT);
            hits = searchResponseAfter.getHits();
            long value1 = hits.getTotalHits().value;
            if(value1 == 0){
                System.out.println("-----彻底结束了-------");
                break;
            }
            searchHists = hits.getHits();
            if(searchHists.length == 0){
                System.out.println("-----彻底结束了-------");
                break;
            }
            sortValues = searchHists[searchHists.length - 1].getSortValues();
            for (SearchHit hit : searchHists) {
                JSONObject jsonObject = JSONObject.parseObject(hit.getSourceAsString());
                Object o = jsonObject.get("id");
                lines.add(o.toString());
            }
            try (BufferedWriter writer = new BufferedWriter(new FileWriter(filePath, true))) {
                // 循环写入每一行
                for (String line : lines) {
                    writer.write(line);
                    writer.newLine(); // 写入换行符
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

        // 关闭客户端
        client.close();

统计实际跑数据得出:第一种方式的效率更高

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值