ES查询百万数据，分批写入CSV

最新推荐文章于 2023-12-06 10:40:28 发布

　Admin

最新推荐文章于 2023-12-06 10:40:28 发布

阅读量4.0k

点赞数

文章标签： dreamweaver

本文链接：https://blog.csdn.net/weixin_43618209/article/details/132870827

版权

bom头信息要在把文件写到服务器中之前添加；

要不用微软的excel 打开含汉字会乱码；

 /**
     * 滚动输出，首先要构造第一行的属性名称，然后滚动写入其他数据
     * 对于csv，每行的数据与数据之间用“,”间隔
     */
    @Test
    public void scroll() throws IOException {
        int size = 10000;  //设置每次滚动输出条数
        //创建文件存放路径
        String path = this.getClass().getResource("/").getPath()+"download/";
        File file = new File(path);
        if(!file.exists()){
            file.mkdirs();
        }
        String fileName = "scrollOutputTest.csv";
        FileWriter fw = new FileWriter(path+fileName);

//微软的excel文件需要通过文件头的bom来识别编码，所以写文件时，需要先写入bom头
        byte[] uft8bom={(byte)0xef,(byte)0xbb,(byte)0xbf};
        fw.write(new String(uft8bom));

        long startTime = System.currentTimeMillis();    //获取开始时间
        SearchResponse response = client.prepareSearch("twitter").setTypes("tweet")
                .addSort(SortBuilders.fieldSort("_doc"))
                .setSize(size).setScroll(new TimeValue(2000)).execute()
                .actionGet();
        //获取总数量
        long totalCount = response.getHits().getTotalHits();
        int page = (int) totalCount / size;
        System.out.println("totalCount:" + totalCount);

        String fields = getFields(response);
        //添加字段名称
        fw.write(fields);
        scrollOutputToCsv(response,fw);

        for (int i = 0; i < page-1; i++) {
            //再次发送请求,并使用上次搜索结果的ScrollId
            response = client.prepareSearchScroll(response.getScrollId())
                    .setScroll(new TimeValue(20000)).execute()
                    .actionGet();
            scrollOutputToCsv(response,fw);
        }
        fw.close();
        long endTime = System.currentTimeMillis();    //获取结束时间
        System.out.println("运行时间："+(endTime-startTime)+"ms");
    }

//获取所有字段
    public String getFields(SearchResponse response){
        String fields = "";
        if(response.getHits().getTotalHits() !=0){
            Map<String,Object> map = response.getHits().getAt(0).getSourceAsMap();
            for (Map.Entry<String, Object> entry : map.entrySet()) {
                String key = entry.getKey();
                fields += key+",";
            }
        }
        return fields.substring(0,fields.length()-1);
    }

    public static void scrollOutputToCsv(SearchResponse response,FileWriter fw) throws IOException {
        String values;
        for(int i = 0;i<response.getHits().getHits().length;i++){
            values = "";
            Map<String,Object> map = response.getHits().getAt(i).getSourceAsMap();
            for (Map.Entry<String, Object> entry : map.entrySet()) {
                String value = entry.getValue().toString();
                values += value+",";
            }
            values = "\r\n"+values.substring(0,values.length()-1);
            fw.write(values);
        }
        fw.flush();
    }

下载