因需要迁移准生产环境的Kudu TS角色,需要在测试环境中模拟一些数据来测试角色迁移的可执行性,所以需要使用java api对kudu进行一些基础操作。当然你也可以使用impala,但是上千万的数据使用impala有点浪费资源了。废话不多说了,直接看代码示例吧。
maven依赖
<dependencies>
<dependency>
<groupId>org.apache.kudu</groupId>
<artifactId>kudu-client</artifactId>
<version>1.9.0-cdh6.2.1</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
<dependency>
<groupId>org.apache.kudu</groupId>
<artifactId>kudu-client-tools</artifactId>
<version>1.9.0-cdh6.2.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.kudu/kudu-spark2 -->
<dependency>
<groupId>org.apache.kudu</groupId>
<artifactId>kudu-spark2_2.11</artifactId>
<version>1.9.0-cdh6.2.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.1.0</version>
</dependency>
</dependencies>
Java代码示例
package com.wqg.base;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.kudu.ColumnSchema;
import org.apache.kudu.Schema;
import org.apache.kudu.Type;
import org.apache.kudu.client.*;
import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
/**
* 在kerberos认证的基础下堆kudu做一些基础操作,如创建表、插入数据、查询数据、删除数据、更新数据等
*/
public class KuduBaseOperation {
//声明全局变量KuduClient 后期通过它来操作kudu表
private static KuduClient kuduClient;
//指定kudu master地址
private static String kuduMaster = "cm111:7051,cm112:7051,cm113:7051";
public static void main(String[] args) {
//获取Kudu连接
KuduClient kuduClient = getKuduClient();
System.out.println(kuduClient);
//kudu建表
String tableName = "kudu_tb_4";
try {
boolean b = kuduClient.tableExists(tableName);
if (!b) { //表不存在时则创建
//createTable(tableName, kuduClient);
} else {
System.out.println(tableName + "表已存在.");
deleteTbaleData(tableName, kuduClient);
System.out.println("删表成功");
}
//向kudu表种写入数据
//long start = System.currentTimeMillis();
insertTable(tableName, kuduClient);
//long end = System.currentTimeMillis();
//System.out.println("tableName="+tableName+"花费时间="+(end-start)/1000+"秒");
//读取表数据
//readTbale(tableName,kuduClient);
//更新表数据
//updateTbale(tableName, kuduClient);
kuduClient.close();
} catch (KuduException e) {
e.printStackTrace();
}
}
/**
* 建表操作
*
* @param tableName
* @param kuduClient
*/
public static void createTable(String tableName, KuduClient kuduClient) {
ArrayList<ColumnSchema> columnSchemas = new ArrayList<>();
columnSchemas.add(new ColumnSchema.ColumnSchemaBuilder("id", Type.INT32).key(true).build());
columnSchemas.add(new ColumnSchema.ColumnSchemaBuilder("name", Type.STRING).build());
columnSchemas.add(new ColumnSchema.ColumnSchemaBuilder("address", Type.STRING).build());
Schema schema = new Schema(columnSchemas);
//指定创建表的相关属性
CreateTableOptions options = new CreateTableOptions();
ArrayList<String> partitionList = new ArrayList<>();
//指定kudu表的分区字段是什么
partitionList.add("id"); //按照 id.hashcode%分区数=分区号
options.addHashPartitions(partitionList, 6);
try {
kuduClient.createTable(tableName, schema, options);
} catch (KuduException e) {
e.printStackTrace();
}
}
/**
* 向kudu表种插入数据
*
* @param tableName
* @param kuduClient
*/
public static void insertTable(String tableName, KuduClient kuduClient) {
//向表加载数据需要一个kuduSession对象
KuduSession kuduSession = kuduClient.newSession();
kuduSession.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC);
//需要使用kuduTable来构建Option的子类实例对象
KuduTable kuduTable = null;
try {
kuduTable = kuduClient.openTable(tableName);
for (int i = 0; i < 2000; i++) {
Insert insert = kuduTable.newInsert();
PartialRow row = insert.getRow();
row.addInt("id", i);
row.addString("name", "name-" + i);
row.addString("address", "某省某市-" + i);
kuduSession.apply(insert); //最后执行数据加载操作
}
} catch (KuduException e) {
e.printStackTrace();
}
}
public static void batchInsertTbale(String tableName, KuduClient kuduClient) {
//向表加载数据需要一个kuduSession对象
KuduSession kuduSession = kuduClient.newSession();
kuduSession.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC);
//需要使用kuduTable来构建Option的子类实例对象
KuduTable kuduTable = null;
try {
kuduTable = kuduClient.openTable(tableName);
for (int i = 0; i < 500000; i++) {
Insert insert = kuduTable.newInsert();
PartialRow row = insert.getRow();
row.addInt("id", i);
row.addString("name", "name-" + i);
row.addString("address", "某省某市-" + i);
kuduSession.apply(insert); //最后执行数据加载操作
}
} catch (KuduException e) {
e.printStackTrace();
}
}
/**
* 读取表数据
*
* @param tableName
* @param kuduClient
*/
public static void readTbale(String tableName, KuduClient kuduClient) {
//构建一个查询扫描器
KuduScanner.KuduScannerBuilder scannerBuilder = null;
try {
scannerBuilder = kuduClient.newScannerBuilder(kuduClient.openTable(tableName));
ArrayList<String> columnsList = new ArrayList<>();
columnsList.add("id");
columnsList.add("name");
columnsList.add("address");
scannerBuilder.setProjectedColumnNames(columnsList);
//返回结果集
KuduScanner kuduScanner = scannerBuilder.build();
//遍历
while (kuduScanner.hasMoreRows()) {
RowResultIterator rowResults = null;
rowResults = kuduScanner.nextRows();
while (rowResults.hasNext()) {
RowResult row = rowResults.next();
int id = row.getInt("id");
String name = row.getString("name");
String address = row.getString("address");
System.out.println("id=" + id + " name=" + name + " address=" + address);
}
}
} catch (KuduException e) {
e.printStackTrace();
}
}
/**
* 指定ID,更新某一行数据
*
* @param tableName
* @param kuduClient
*/
public static void updateTbale(String tableName, KuduClient kuduClient) {
//修改表数据需要一个 kuduSession 对象
KuduSession kuduSession = kuduClient.newSession();
kuduSession.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC);
//需要使用 KuduTable 来构建Operation 的子类实例对象
KuduTable kuduTable = null;
try {
kuduTable = kuduClient.openTable(tableName);
// Update update = kuduTable.newUpdate(); //如果id存在就表示修改,不存在不做任何操作,也不会报错
Upsert upsert = kuduTable.newUpsert(); //如果id存在就表示修改,不存在就表示新增.
PartialRow row = upsert.getRow();
row.addInt("id", 100);
row.addString("name", "kudu_upsert");
row.addString("address", "某国某省_upsert");
kuduSession.apply(upsert); //最后执行数据的修改操作
} catch (KuduException e) {
e.printStackTrace();
}
}
/**
* 根据ID删除指定一行数据
*
* @param tableName
* @param kuduClient
*/
public static void deleteTbaleData(String tableName, KuduClient kuduClient) {
//删除表数据需要一个 KuduSession 对象
KuduSession kuduSession = kuduClient.newSession();
kuduSession.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC);
//需要使用 KuduTable 来构建Operation 的子类实例对象
KuduTable kuduTable = null;
try {
kuduTable = kuduClient.openTable(tableName);
Delete delete = kuduTable.newDelete();
PartialRow row = delete.getRow();
row.addInt("id", 100);
kuduSession.apply(delete);//最后执行数据的删除操作
} catch (KuduException e) {
e.printStackTrace();
}
}
/**
* 删除表
*
* @param tableName
* @param kuduClient
*/
public void dropTable(String tableName, KuduClient kuduClient) {
try {
if (kuduClient.tableExists(tableName)) {
kuduClient.deleteTable(tableName);
}
} catch (KuduException e) {
e.printStackTrace();
}
}
/**
* 获取Kudu连接
*
* @return
*/
public static KuduClient getKuduClient() {
KerberosAuth();
KuduClient.KuduClientBuilder kuduClientBuilder = new KuduClient.KuduClientBuilder(kuduMaster);
kuduClientBuilder.defaultSocketReadTimeoutMs(10000);
//获取kuduClient连接
try {
UserGroupInformation.getLoginUser().doAs(new PrivilegedExceptionAction<KuduClient>() {
@Override
public KuduClient run() throws Exception {
kuduClient = kuduClientBuilder.build();
return kuduClient;
}
});
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
return kuduClient;
}
/**
* Kerberos验证
*/
public static void KerberosAuth() {
//kerberos认证
System.setProperty("java.security.krb5.conf", "F:\\works\\config\\cm111\\krb5.conf");
System.setProperty("javax.security.auth.useSubjectCredsOnly", "false");
System.setProperty("sun.security.krb5.debug", "false");
Configuration conf = new Configuration();
conf.set("hadoop.security.authentication", "kerberos");
UserGroupInformation.setConfiguration(conf);
try {
UserGroupInformation.loginUserFromKeytab("kudu/cm111@WMM.COM", "F:\\works\\config\\cm111\\kudu.keytab");
System.out.println("Kerberos认证成功,当前用户=" + UserGroupInformation.getCurrentUser());
} catch (IOException e) {
e.printStackTrace();
}
}
}
本人旨在分享工作中一些用到的知识技能
如有感兴趣欢迎关注Wbigdata微信公众号