/**
* 滚动输出,首先要构造第一行的属性名称,然后滚动写入其他数据
* 对于csv,每行的数据与数据之间用“,”间隔
*/
@Test
public void scroll() throws IOException {
int size = 10000; //设置每次滚动输出条数
//创建文件存放路径
String path = this.getClass().getResource("/").getPath()+"download/";
File file = new File(path);
if(!file.exists()){
file.mkdirs();
}
String fileName = "scrollOutputTest.csv";
FileWriter fw = new FileWriter(path+fileName);
//微软的excel文件需要通过文件头的bom来识别编码,所以写文件时,需要先写入bom头
byte[] uft8bom={(byte)0xef,(byte)0xbb,(byte)0xbf};
fw.write(new String(uft8bom));
long startTime = System.currentTimeMillis(); //获取开始时间
SearchResponse response = client.prepareSearch("twitter").setTypes("tweet")
.addSort(SortBuilders.fieldSort("_doc"))
.setSize(size).setScroll(new TimeValue(2000)).execute()
.actionGet();
//获取总数量
long totalCount = response.getHits().getTotalHits();
int page = (int) totalCount / size;
System.out.println("totalCount:" + totalCount);
String fields = getFields(response);
//添加字段名称
fw.write(fields);
scrollOutputToCsv(response,fw);
for (int i = 0; i < page-1; i++) {
//再次发送请求,并使用上次搜索结果的ScrollId
response = client.prepareSearchScroll(response.getScrollId())
.setScroll(new TimeValue(20000)).execute()
.actionGet();
scrollOutputToCsv(response,fw);
}
fw.close();
long endTime = System.currentTimeMillis(); //获取结束时间
System.out.println("运行时间:"+(endTime-startTime)+"ms");
}
//获取所有字段
public String getFields(SearchResponse response){
String fields = "";
if(response.getHits().getTotalHits() !=0){
Map<String,Object> map = response.getHits().getAt(0).getSourceAsMap();
for (Map.Entry<String, Object> entry : map.entrySet()) {
String key = entry.getKey();
fields += key+",";
}
}
return fields.substring(0,fields.length()-1);
}
public static void scrollOutputToCsv(SearchResponse response,FileWriter fw) throws IOException {
String values;
for(int i = 0;i<response.getHits().getHits().length;i++){
values = "";
Map<String,Object> map = response.getHits().getAt(i).getSourceAsMap();
for (Map.Entry<String, Object> entry : map.entrySet()) {
String value = entry.getValue().toString();
values += value+",";
}
values = "\r\n"+values.substring(0,values.length()-1);
fw.write(values);
}
fw.flush();
}
bom头信息要在把文件写到服务器中之前添加;
这里只是把csv文件写到服务器中,还需要从服务器下载文件到本地。
具体过程见博客:https://blog.csdn.net/qq_34624315/article/details/81537413
欢迎关注我的微信公众号,会同步更新python、java、算法等相关内容!!!