第一种方式 游标方式
String filePath = "E:\\output.txt"; // 输出文件路径
RestClientBuilder builder = RestClient.builder(new HttpHost("127.0.0.1", 9200, "http"));
String indexName = "works";
try (RestHighLevelClient client = new RestHighLevelClient(builder)) {
String scrollId = null;
int size = 1000;
try {
for (; ; ) {
SearchResponse response;
if (null == scrollId) {
SearchSourceBuilder source = new SearchSourceBuilder().query(QueryBuilders.matchAllQuery()).size(size)
.fetchSource(new String[]{"id"}, null);
SearchRequest searchRequest = new SearchRequest(new String[]{indexName}, source);
searchRequest.scroll(TimeValue.timeValueMinutes(5));
response = client.search(searchRequest, RequestOptions.DEFAULT);
} else {
SearchScrollRequest searchScrollRequest = new SearchScrollRequest(scrollId);
searchScrollRequest.scroll(TimeValue.timeValueMinutes(5));
response = client.scroll(searchScrollRequest, RequestOptions.DEFAULT);
}
if (null != response) {
scrollId = response.getScrollId();
SearchHits searchHits = response.getHits();
// 跳出
if (null == searchHits) {
break;
}
// 跳出
SearchHit[] hits = searchHits.getHits();
if (ArrayUtils.isEmpty(hits)) {
break;
}
// TODO 每条数据处理
List<String> lines = new ArrayList<>();
for (SearchHit hit : hits) {
JSONObject jsonObject = JSONObject.parseObject(hit.getSourceAsString());
Object o = jsonObject.get("id");
lines.add(o.toString());
}
try (BufferedWriter writer = new BufferedWriter(new FileWriter(filePath, true))) {
// 循环写入每一行
for (String line : lines) {
writer.write(line);
writer.newLine(); // 写入换行符
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
} finally {
// 清理游标
if (null != scrollId) {
ClearScrollRequest clearScrollRequest = new ClearScrollRequest();
clearScrollRequest.addScrollId(scrollId);
client.clearScroll(clearScrollRequest, RequestOptions.DEFAULT);
}
}
} catch (Exception e) {
System.out.println("===error==" + e.getMessage());
e.printStackTrace();
}
第二种方式 searchAfter
String filePath = "C:\\Users\\admin\\Desktop\\output1.txt"; // 输出文件路径
// 初始化RestHighLevelClient
RestClientBuilder builder = RestClient.builder(new HttpHost("127.0.0.1", 9200, "http"));
RestHighLevelClient client = new RestHighLevelClient(builder);
// 创建查询请求并设置索引名
SearchRequest searchRequest = new SearchRequest("works");
// 设置分页参数
int page = 1; // 当前页码
int pageSize = 10000; // 每页显示数量
int from = 0;
System.out.println("index:"+ page+";; from:"+from+";; pageSize:"+pageSize);
// 构建查询条件
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.query(QueryBuilders.queryStringQuery("status:1"));
searchSourceBuilder.from(from);
searchSourceBuilder.size(pageSize);
searchSourceBuilder.sort("id", SortOrder.DESC);
searchSourceBuilder.trackTotalHits(true);
searchRequest.source(searchSourceBuilder);
// 设置只获取特定字段
searchRequest.source().fetchSource(new String[]{"id"}, new String[]{});
// 执行查询
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
List<String> lines = new ArrayList<>();
SearchHits hits = searchResponse.getHits();
long value = hits.getTotalHits().value;
if(value == 0){
return;
}
SearchHit[] searchHists = hits.getHits();
Object[] sortValues = searchHists[searchHists.length - 1].getSortValues();
// 遍历查询结果
for (SearchHit hit : searchHists) {
JSONObject jsonObject = JSONObject.parseObject(hit.getSourceAsString());
Object o = jsonObject.get("id");
lines.add(o.toString());
}
try (BufferedWriter writer = new BufferedWriter(new FileWriter(filePath))) {
// 循环写入每一行
for (String line : lines) {
writer.write(line);
writer.newLine(); // 写入换行符
}
} catch (IOException e) {
e.printStackTrace();
}
//往后的每次请求都携带上一次的sort_id进行访问。
while (ArrayUtils.isNotEmpty(searchHists)){
page++;
lines = new ArrayList<>();
from = (page - 1) * pageSize;
System.out.println("index:"+ page+";; from:"+from+";; pageSize:"+pageSize+";;time:"+ DateUtil.format(new Date(), "YYYY-MM-dd HH:mm:ss"));
searchSourceBuilder.searchAfter(sortValues);
// searchSourceBuilder.from(from);
// searchSourceBuilder.size(pageSize);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponseAfter = client.search(searchRequest, RequestOptions.DEFAULT);
hits = searchResponseAfter.getHits();
long value1 = hits.getTotalHits().value;
if(value1 == 0){
System.out.println("-----彻底结束了-------");
break;
}
searchHists = hits.getHits();
if(searchHists.length == 0){
System.out.println("-----彻底结束了-------");
break;
}
sortValues = searchHists[searchHists.length - 1].getSortValues();
for (SearchHit hit : searchHists) {
JSONObject jsonObject = JSONObject.parseObject(hit.getSourceAsString());
Object o = jsonObject.get("id");
lines.add(o.toString());
}
try (BufferedWriter writer = new BufferedWriter(new FileWriter(filePath, true))) {
// 循环写入每一行
for (String line : lines) {
writer.write(line);
writer.newLine(); // 写入换行符
}
} catch (IOException e) {
e.printStackTrace();
}
}
// 关闭客户端
client.close();
统计实际跑数据得出:第一种方式的效率更高