Java API实现对Kudu操作

因需要迁移准生产环境的Kudu TS角色,需要在测试环境中模拟一些数据来测试角色迁移的可执行性,所以需要使用java api对kudu进行一些基础操作。当然你也可以使用impala,但是上千万的数据使用impala有点浪费资源了。废话不多说了,直接看代码示例吧。

e0fc920dbaca62601ed48362123c3f8b.png

maven依赖

4f8669d2729f1460cadc8b7e97d27b27.png

<dependencies>
        <dependency>
            <groupId>org.apache.kudu</groupId>
            <artifactId>kudu-client</artifactId>
            <version>1.9.0-cdh6.2.1</version>
        </dependency>


        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
        </dependency>


        <dependency>
            <groupId>org.apache.kudu</groupId>
            <artifactId>kudu-client-tools</artifactId>
            <version>1.9.0-cdh6.2.1</version>
        </dependency>


        <!-- https://mvnrepository.com/artifact/org.apache.kudu/kudu-spark2 -->
        <dependency>
            <groupId>org.apache.kudu</groupId>
            <artifactId>kudu-spark2_2.11</artifactId>
            <version>1.9.0-cdh6.2.1</version>
        </dependency>


        <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.11</artifactId>
            <version>2.1.0</version>
        </dependency>
    </dependencies>

73af713e491616fae292a52cee85138b.png

Java代码示例

0bc54bc1dedc42021706983ec5e9c00e.png

package com.wqg.base;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.kudu.ColumnSchema;
import org.apache.kudu.Schema;
import org.apache.kudu.Type;
import org.apache.kudu.client.*;


import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;


/**
 * 在kerberos认证的基础下堆kudu做一些基础操作,如创建表、插入数据、查询数据、删除数据、更新数据等
 */
public class KuduBaseOperation {
    //声明全局变量KuduClient 后期通过它来操作kudu表
    private static KuduClient kuduClient;
    //指定kudu master地址
    private static String kuduMaster = "cm111:7051,cm112:7051,cm113:7051";


    public static void main(String[] args) {
        //获取Kudu连接
        KuduClient kuduClient = getKuduClient();
        System.out.println(kuduClient);
        //kudu建表
        String tableName = "kudu_tb_4";
        try {
            boolean b = kuduClient.tableExists(tableName);
            if (!b) { //表不存在时则创建
            //createTable(tableName, kuduClient);


            } else {
                System.out.println(tableName + "表已存在.");
                deleteTbaleData(tableName, kuduClient);
                System.out.println("删表成功");
            }
            //向kudu表种写入数据
            //long start = System.currentTimeMillis();
            insertTable(tableName, kuduClient);


            //long end = System.currentTimeMillis();
            //System.out.println("tableName="+tableName+"花费时间="+(end-start)/1000+"秒");
            //读取表数据
            //readTbale(tableName,kuduClient);
            //更新表数据
            //updateTbale(tableName, kuduClient);




            kuduClient.close();
        } catch (KuduException e) {
            e.printStackTrace();
        }
    }
    /**
     * 建表操作
     *
     * @param tableName
     * @param kuduClient
     */
    public static void createTable(String tableName, KuduClient kuduClient) {
        ArrayList<ColumnSchema> columnSchemas = new ArrayList<>();
        columnSchemas.add(new ColumnSchema.ColumnSchemaBuilder("id", Type.INT32).key(true).build());
        columnSchemas.add(new ColumnSchema.ColumnSchemaBuilder("name", Type.STRING).build());
        columnSchemas.add(new ColumnSchema.ColumnSchemaBuilder("address", Type.STRING).build());
        Schema schema = new Schema(columnSchemas);
        //指定创建表的相关属性
        CreateTableOptions options = new CreateTableOptions();
        ArrayList<String> partitionList = new ArrayList<>();
        //指定kudu表的分区字段是什么
        partitionList.add("id"); //按照 id.hashcode%分区数=分区号
        options.addHashPartitions(partitionList, 6);
        try {
            kuduClient.createTable(tableName, schema, options);
        } catch (KuduException e) {
            e.printStackTrace();
        }
    }


    /**
     * 向kudu表种插入数据
     *
     * @param tableName
     * @param kuduClient
     */
    public static void insertTable(String tableName, KuduClient kuduClient) {
        //向表加载数据需要一个kuduSession对象
        KuduSession kuduSession = kuduClient.newSession();
        kuduSession.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC);
        //需要使用kuduTable来构建Option的子类实例对象
        KuduTable kuduTable = null;
        try {
            kuduTable = kuduClient.openTable(tableName);
            for (int i = 0; i < 2000; i++) {
                Insert insert = kuduTable.newInsert();
                PartialRow row = insert.getRow();
                row.addInt("id", i);
                row.addString("name", "name-" + i);
                row.addString("address", "某省某市-" + i);
                kuduSession.apply(insert); //最后执行数据加载操作
            }
        } catch (KuduException e) {
            e.printStackTrace();
        }
    }


    public static void batchInsertTbale(String tableName, KuduClient kuduClient) {
        //向表加载数据需要一个kuduSession对象
        KuduSession kuduSession = kuduClient.newSession();
        kuduSession.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC);
        //需要使用kuduTable来构建Option的子类实例对象
        KuduTable kuduTable = null;
        try {
            kuduTable = kuduClient.openTable(tableName);
            for (int i = 0; i < 500000; i++) {
                Insert insert = kuduTable.newInsert();
                PartialRow row = insert.getRow();
                row.addInt("id", i);
                row.addString("name", "name-" + i);
                row.addString("address", "某省某市-" + i);
                kuduSession.apply(insert); //最后执行数据加载操作
            }
        } catch (KuduException e) {
            e.printStackTrace();
        }
    }


    /**
     * 读取表数据
     *
     * @param tableName
     * @param kuduClient
     */
    public static void readTbale(String tableName, KuduClient kuduClient) {
        //构建一个查询扫描器
        KuduScanner.KuduScannerBuilder scannerBuilder = null;
        try {
            scannerBuilder = kuduClient.newScannerBuilder(kuduClient.openTable(tableName));
            ArrayList<String> columnsList = new ArrayList<>();
            columnsList.add("id");
            columnsList.add("name");
            columnsList.add("address");
            scannerBuilder.setProjectedColumnNames(columnsList);
            //返回结果集
            KuduScanner kuduScanner = scannerBuilder.build();
            //遍历
            while (kuduScanner.hasMoreRows()) {
                RowResultIterator rowResults = null;
                rowResults = kuduScanner.nextRows();
                while (rowResults.hasNext()) {
                    RowResult row = rowResults.next();
                    int id = row.getInt("id");
                    String name = row.getString("name");
                    String address = row.getString("address");
                    System.out.println("id=" + id + "   name=" + name + "   address=" + address);
                }
            }
        } catch (KuduException e) {
            e.printStackTrace();
        }
    }


    /**
     * 指定ID,更新某一行数据
     *
     * @param tableName
     * @param kuduClient
     */
    public static void updateTbale(String tableName, KuduClient kuduClient) {
        //修改表数据需要一个 kuduSession 对象
        KuduSession kuduSession = kuduClient.newSession();
        kuduSession.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC);


        //需要使用 KuduTable 来构建Operation 的子类实例对象
        KuduTable kuduTable = null;
        try {
            kuduTable = kuduClient.openTable(tableName);


//        Update update = kuduTable.newUpdate(); //如果id存在就表示修改,不存在不做任何操作,也不会报错
            Upsert upsert = kuduTable.newUpsert(); //如果id存在就表示修改,不存在就表示新增.


            PartialRow row = upsert.getRow();
            row.addInt("id", 100);
            row.addString("name", "kudu_upsert");
            row.addString("address", "某国某省_upsert");


            kuduSession.apply(upsert); //最后执行数据的修改操作
        } catch (KuduException e) {
            e.printStackTrace();
        }
    }




    /**
     * 根据ID删除指定一行数据
     *
     * @param tableName
     * @param kuduClient
     */
    public static void deleteTbaleData(String tableName, KuduClient kuduClient) {
        //删除表数据需要一个 KuduSession 对象
        KuduSession kuduSession = kuduClient.newSession();
        kuduSession.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC);


        //需要使用 KuduTable 来构建Operation 的子类实例对象
        KuduTable kuduTable = null;
        try {
            kuduTable = kuduClient.openTable(tableName);


            Delete delete = kuduTable.newDelete();


            PartialRow row = delete.getRow();
            row.addInt("id", 100);


            kuduSession.apply(delete);//最后执行数据的删除操作
        } catch (KuduException e) {
            e.printStackTrace();
        }
    }


    /**
     * 删除表
     *
     * @param tableName
     * @param kuduClient
     */
    public void dropTable(String tableName, KuduClient kuduClient) {
        try {
            if (kuduClient.tableExists(tableName)) {
                kuduClient.deleteTable(tableName);
            }
        } catch (KuduException e) {
            e.printStackTrace();
        }
    }


    /**
     * 获取Kudu连接
     *
     * @return
     */
    public static KuduClient getKuduClient() {
        KerberosAuth();
        KuduClient.KuduClientBuilder kuduClientBuilder = new KuduClient.KuduClientBuilder(kuduMaster);
        kuduClientBuilder.defaultSocketReadTimeoutMs(10000);
        //获取kuduClient连接
        try {
            UserGroupInformation.getLoginUser().doAs(new PrivilegedExceptionAction<KuduClient>() {
                @Override
                public KuduClient run() throws Exception {
                    kuduClient = kuduClientBuilder.build();
                    return kuduClient;
                }
            });
        } catch (IOException e) {
            e.printStackTrace();
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        return kuduClient;
    }


    /**
     * Kerberos验证
     */
    public static void KerberosAuth() {
        //kerberos认证
        System.setProperty("java.security.krb5.conf", "F:\\works\\config\\cm111\\krb5.conf");
        System.setProperty("javax.security.auth.useSubjectCredsOnly", "false");
        System.setProperty("sun.security.krb5.debug", "false");
        Configuration conf = new Configuration();
        conf.set("hadoop.security.authentication", "kerberos");
        UserGroupInformation.setConfiguration(conf);
        try {
            UserGroupInformation.loginUserFromKeytab("kudu/cm111@WMM.COM", "F:\\works\\config\\cm111\\kudu.keytab");
            System.out.println("Kerberos认证成功,当前用户=" + UserGroupInformation.getCurrentUser());
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

本人旨在分享工作中一些用到的知识技能

如有感兴趣欢迎关注Wbigdata微信公众号

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值