java实现读取文件，通过bulkprocesser入库有用户名密码验证的ES

本文链接：https://blog.csdn.net/qq_40498209/article/details/107916903

要求读取csv文件入库到带有x-pack验证的es中

es：7.2.0
java：1.8
scala：2.12.10

我贴一下pom文件，蛮乱的，也没有scala的打包插件，自己看着取舍吧

 <properties>
       <scala.version>2.12.10</scala.version>
       <scala.binary.version>2.12</scala.binary.version>
       <elasticsearch.version>7.2.0</elasticsearch.version>
   </properties>


   <repositories>
       <repository>
           <id>JBoss repository</id>
           <url>https://repository.apache.org/content/repositories/snapshots/</url>
       </repository>
       <!-- add the elasticsearch repo -->
       <repository>
           <id>elastic</id>
           <url>https://artifacts.elastic.co/maven</url>
           <releases>
               <enabled>true</enabled>
           </releases>
           <snapshots>
               <enabled>false</enabled>
           </snapshots>
       </repository>
   </repositories>


   <dependencies>

       <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-elasticsearch-base -->

       <dependency>
           <groupId>org.elasticsearch.client</groupId>
           <artifactId>transport</artifactId>
           <version>${elasticsearch.version}</version>
           <exclusions>
               <exclusion>
                   <artifactId>jackson-core</artifactId>
                   <groupId>com.fasterxml.jackson.core</groupId>
               </exclusion>
               <exclusion>
                   <artifactId>log4j-api</artifactId>
                   <groupId>org.apache.logging.log4j</groupId>
               </exclusion>
           </exclusions>
       </dependency>

       <dependency>
           <groupId>org.elasticsearch.client</groupId>
           <artifactId>elasticsearch-rest-high-level-client</artifactId>
           <version>${elasticsearch.version}</version>
           <exclusions>
               <exclusion>
                   <artifactId>commons-logging</artifactId>
                   <groupId>commons-logging</groupId>
               </exclusion>
               <exclusion>
                   <artifactId>httpclient</artifactId>
                   <groupId>org.apache.httpcomponents</groupId>
               </exclusion>
               <exclusion>
                   <artifactId>httpcore</artifactId>
                   <groupId>org.apache.httpcomponents</groupId>
               </exclusion>
           </exclusions>
       </dependency>
       <dependency>
           <groupId>org.elasticsearch.client</groupId>
           <artifactId>x-pack-transport</artifactId>
           <version>${elasticsearch.version}</version>
       </dependency>
       <dependency>
           <groupId>org.elasticsearch</groupId>
           <artifactId>elasticsearch</artifactId>
           <version>${elasticsearch.version}</version>
       </dependency>
       <!--日志-->
       <dependency>
           <groupId>log4j</groupId>
           <artifactId>log4j</artifactId>
           <version>1.2.8</version>
       </dependency>
       <dependency>
       <groupId>org.apache.logging.log4j</groupId>
       <artifactId>log4j-api</artifactId>
       <version>2.9.1</version>
       </dependency>
       <dependency>
           <groupId>org.apache.logging.log4j</groupId>
           <artifactId>log4j-core</artifactId>
           <version>2.9.1</version>
       </dependency>
       <!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-api -->
       <dependency>
           <groupId>org.slf4j</groupId>
           <artifactId>slf4j-api</artifactId>
           <version>1.7.25</version>
       </dependency>
       <!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-log4j12 -->
       <dependency>
           <groupId>org.slf4j</groupId>
           <artifactId>slf4j-log4j12</artifactId>
           <version>1.7.25</version>
       </dependency>
       <dependency>
           <groupId>com.alibaba</groupId>
           <artifactId>fastjson</artifactId>
           <version>1.2.32</version>
       </dependency>
   </dependencies>

main

没啥东西，就是一个方法的调用
path：制表符分割的csv文件
ip：es的ip地址
port：9200
args[2]:ES的用户名
args[3]:ES的密码

readcsv readcsv = new readcsv();
readcsv.readCsv(path,ip,port,args[2],args[3]);

readcsv

创建resthighlevelclient客户端，使用CredentialsProvider 添加x-pack认证用户密码，在这创建的目的是避免频繁的创建关闭client和bulkprocessor
这里的InsertToES是自己创建的发送数据类
EsConfiguration是自己创建的BulkProcessor的配置类
这里面的参数：
path：csv文件的路径
ip：es的ip
port：9200
name：es用户名
password：es密码

public class readcsv {
    static InsertToEs insertToEs = new InsertToEs();
    static ESConfiguration esConfiguration = new ESConfiguration();
    public static BulkProcessor bulkProcessor;
    public static   RestHighLevelClient esClient;;

    public void readCsv(String path,String ip,Integer port,String name,String password){

        //初始化ES操作客户端
        final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
        credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(name, password));
        esClient = new RestHighLevelClient(
                RestClient.builder(
                        new HttpHost(ip, port)
                ).setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() {
                    public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) {
                        httpClientBuilder.disableAuthCaching();
                        return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
                    }
                })
        );
        try {
            bulkProcessor = esConfiguration.bulkProcessor(esClient);
        } catch (UnknownHostException e) {
            e.printStackTrace();
        }

        try (FileInputStream inputStream = new FileInputStream(path);
                Scanner sc = new Scanner(inputStream)) {
              while (sc.hasNextLine()) {
                String line = sc.nextLine();
                    insertToEs.insert(line,bulkProcessor);
            }
                      bulkProcessor.flush();
//            Thread.sleep(1000*60);
            bulkProcessor.awaitClose(5, TimeUnit.MINUTES);
//            bulkProcessor.close();
            esClient.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}

insertToEs

这里的代码我就不贴全了，涉及到公司代码，只贴一些根本文章有关

public class InsertToEs {

    static String index = "index";
    static String type = "type";
    private static Logger logger= LogManager.getLogger(InsertToEs.class.getName());
     public void insert(String str,BulkProcessor bulkProcessor)  throws  Exception{

            String[] csvRow = str.split("\t");
           HashMap json = new HashMap<String,Object>();
          json.put("file1",csvRow[0]);
          json.put("file2",Integer.valueof(csvRow[1]));
  bulkProcessor.add(new IndexRequest(index, type).source(json));

ESConfiguration

这里面就是一直配置，每个配置啥意思也在代码中写了
beforeBulk：在一个bulk进行之前的操作
第一个afterBulk：正常bulk入数据库
第二个afterBulk：bulk入库出现异常

public class ESConfiguration {
    public BulkProcessor bulkProcessor(RestHighLevelClient esClient) throws UnknownHostException {


        BulkProcessor.Listener listener = new BulkProcessor.Listener() {
            @Override
            public void beforeBulk(long l, BulkRequest bulkRequest) {


            }

            @Override
            public void afterBulk(long l, BulkRequest bulkRequest, BulkResponse bulkResponse) {
            }

            @Override
            public void afterBulk(long l, BulkRequest bulkRequest, Throwable throwable) {
                System.out.println("{} data bulk failed,reason :{}" + bulkRequest.numberOfActions() + throwable);
            }
        };
        BulkProcessor bulkProcessor = BulkProcessor.builder(
                (request, bulkListener) -> esClient.bulkAsync(request, RequestOptions.DEFAULT, bulkListener),
                listener)
                .setBulkActions(50000)//每50000条执行一次bulk

                .setBulkSize(new ByteSizeValue(10, ByteSizeUnit.MB))//每达到10m的size，执行一次bulk

                .setFlushInterval(TimeValue.timeValueSeconds(300))//5min刷新索引

                .setConcurrentRequests(10)默认是1，表示积累bulk requests和发送bulk是异步的，其数值表示发送bulk的并发线程数，设置为0表示二者同步的

                .setBackoffPolicy(BackoffPolicy.exponentialBackoff(TimeValue.timeValueMillis(100), 3))//当ES由于资源不足发生异常EsRejectedExecutionException重試策略：默认（50ms, 8）,

                .build();
        return bulkProcessor;
    }
}