远程读取elasticSearch数据库并导出数据

最近刚开完题,毕设是使用机器学习算法对电磁数据中的异常进行检测。所有的电磁数据都存储在分布式数据库es中,所以第一步需要导出数据,这两天写了点这部分的程序,已经导出部分数据。

package org.elasticsearch.esTest;

import java.awt.List;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.concurrent.ExecutionException;
//maven管理依赖
import org.elasticsearch.action.admin.indices.stats.IndicesStatsAction;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.transport.TransportClient;

import org.elasticsearch.index.query.QueryBuilders.*;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.indices.IndexMissingException;
import org.elasticsearch.search.SearchHits;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;

/**
 * Hello world!
 *
 */
public class EsClient {
    static File trace = new File("E:/es data/emcas-2018.01.04_trace.txt");
    static File warning = new File("E:/es data/emcas-2018.01.04_warning.txt");
    static File other = new File("E:/es data/emcas-2018.01.04_other.txt");

    public static Client  getClient() throws IOException {
         Settings settings = ImmutableSettings.settingsBuilder()
                    .put("cluster.name", "estest1")
                    .build();
            TransportClient client = new TransportClient(settings).addTransportAddress(  
                    new InetSocketTransportAddress("10.10.41.153", 9300));
//          FileWriter fw = new FileWriter(article);
//          BufferedWriter bfw = new BufferedWriter(fw);
            return client; 
    }


   public static HashSet<String> write2File(Client client) throws IOException{
       long start = System.currentTimeMillis();
       int scrollSize = 1000;
       SearchResponse response = null;     
       FileWriter fw_trace1 = new FileWriter(trace);
       BufferedWriter bfw1 = new BufferedWriter(fw_trace1);

       FileWriter fw_warning1 = new FileWriter(warning);
       BufferedWriter bfw2 = new BufferedWriter(fw_warning1);

       FileWriter fw_other1 = new FileWriter(trace);
       BufferedWriter bfw3 = new BufferedWriter(fw_other1);

//     ArrayList<Integer>collectid = new ArrayList<Integer>();
       HashSet collectid = new HashSet();

       int i =0;
       while (response == null || response.getHits().hits().length != 0 && i <=1) {            
//         if(i % 100 == 0){
//             fw = new FileWriter(autoCreateFile(i/10+1));
//             BufferedWriter bfw1 = new BufferedWriter(fw);    
//             bfw = bfw1;
//             System.out.println("这是第"+i/10+"万条数据");
//         }           
           try{
           response = client.prepareSearch("emcas-2017.10.16")
                    .setQuery(QueryBuilders.matchAllQuery())
                    .setSize(scrollSize)                            
                    .setFrom(i*scrollSize)
//                  .setFrom(0)
                    .execute()
                    .actionGet();
           }
           catch (IndexMissingException e) {
             System.out.println("not found");
        }   




           SearchHits hits = response.getHits();

           int trace_count = 0;
           int warning_count =0;
           int other_count = 0;

           for(int j = 0 ; j < hits.getHits().length; j++){
               String jsonstr = hits.getHits()[j]
                       .getSourceAsString();             
               JSONObject json_1 = JSON.parseObject(jsonstr);
               System.out.println(json_1);



               if(json_1.get("eventType").equals("trace")){
                   trace_count++;
                   collectid.add(json_1.get("collectorId"));
                   if(trace_count % 100000 == 0){
                       FileWriter fw_trace2 = new FileWriter(autoCreateFile(trace_count/100000));
                       BufferedWriter bfw_trace = new BufferedWriter(fw_trace2);
                       bfw1 = bfw_trace;
                   }
                   bfw1.write(json_1.toString()+'\r');
                   bfw1.flush();
               }else if(json_1.get("eventType").equals("warning")){
                   warning_count++;
                   if(warning_count % 100 == 0){
                       FileWriter fw_warning2 = new FileWriter(autoCreateFile(warning_count/100));
                       BufferedWriter bfw_warning2 = new BufferedWriter(fw_warning2);
                       bfw2 = bfw_warning2;
                   }
                   bfw2.write(json_1.toString()+'\r');
                   bfw2.flush();
               }else{
                   other_count++;
                   if(other_count % 100 == 0){
                       FileWriter fw_other2 = new FileWriter(autoCreateFile(other_count/100));
                       BufferedWriter bfw_other2 = new BufferedWriter(fw_other2);
                       bfw3 = bfw_other2;
                   }
                   bfw3.write(json_1.toString()+'\r');
                   bfw3.flush();
               }
              }                                           
           i++;
       }            
            bfw1.close();
            bfw2.close();
            bfw3.close();
            fw_other1.close();
            fw_trace1.close();
            fw_warning1.close();
            long end = System.currentTimeMillis();
            long totalTime = end - start;
            System.out.println("总耗时:"+totalTime);
            return collectid;
      }



   public static File autoCreateFile(int i ) throws IOException {
       File file = new File("E:/es data/"+i+".txt");    
       file.createNewFile();
       return file;
   }




    public static void main(String[] args) throws InterruptedException, ExecutionException, IOException {
        EsClient instance = new EsClient();
        Client client = instance.getClient();
        HashSet hashSet = new HashSet();
        hashSet = write2File(client);
        for (Object object : hashSet) {
            System.out.println(object);
        }
        System.out.println(hashSet.size()+"size!!!!!!!!");
//      GetResponse response = client.prepareGet("emcas-2017.10.18","trace","AV8tK5NeSBmsIUk260HQ")
//      GetResponse response = client.prepareGet("emcas-2017.10.18","status","4")
//              .execute()
//              .actionGet();       
//      System.out.println(response.getSource());       
//用于计算es数据库中一个index下docs的总记录数
//      SearchResponse response2 = client.prepareSearch("emcas-2018.01.04")
//                  .setQuery(QueryBuilders.matchAllQuery())
//                  .setSize(0)                             
//                  .execute()
//                  .actionGet();
//      SearchHits hits = response2.getHits();
//      long hitscount = hits.getTotalHits();
//      System.out.println(hitscount);
    }
}
JavaScript中,直接迁移Elasticsearch数据库并不是一个常见的操作,因为JavaScript通常不是用来处理底层数据库迁移的。通常,数据迁移会在服务器端完成,比如使用Elasticsearch自身的快照和恢复功能,或者通过API迁移数据。 如果你想使用JavaScript来辅助迁移Elasticsearch数据,你可以使用Elasticsearch提供的REST API来查询数据,并将数据导出到文件中,然后再将这些数据导入到另一个Elasticsearch实例中。 下面是一个简单的例子,展示如何使用Node.js和Elasticsearch JavaScript客户端来导出索引数据到JSON文件中: ```javascript const { Client } = require('@elastic/elasticsearch'); const fs = require('fs'); const client = new Client({ node: 'http://localhost:9200' }); async function exportIndexData(indexName, fileName) { const response = await client.search({ index: indexName, size: 1000, // 假设每次返回1000条数据,根据实际情况调整 _source: '*', // 返回所有字段 scroll: '2m' // 设置滚动时间窗口 }); const hits = response.body.hits.hits; // 将当前批次的数据写入文件 fs.writeFileSync(fileName, JSON.stringify(hits.map(hit => hit._source), null, 2) + '\n'); // 如果还有更多批次的数据,则继续滚动 if (response.body hits hits.length === 1000) { // 继续滚动获取数据的逻辑 } } // 调用函数,例如导出名为 'my_index' 的索引数据到 'data.json' exportIndexData('my_index', 'data.json').catch(console.log); ``` 然后,你可以使用相应的Elasticsearch REST API或客户端库从JSON文件导入数据到新的Elasticsearch集群中。 请注意,这只是一个非常基础的示例。在实际的迁移过程中,可能需要考虑数据的一致性、完整性、错误处理以及性能问题。更复杂的场景下,你可能需要编写更复杂的逻辑来确保数据能够正确迁移。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值