maven依赖
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.1.6.RELEASE</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>com.gz</groupId>
<artifactId>spark-kudu-demo</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>spark-kudu-demo</name>
<description>Demo project for Spring Boot</description>
<properties>
<java.version>1.8</java.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.kudu</groupId>
<artifactId>kudu-spark2_2.11</artifactId>
<version>1.7.0</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.3.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.3.0</version>
</dependency>
<dependency>
<groupId>org.apache.kudu</groupId>
<artifactId>kudu-client</artifactId>
<version>1.7.0</version>
</dependency>
<dependency>
<groupId>org.apache.kudu</groupId>
<artifactId>kudu-client-tools</artifactId>
<version>1.7.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>7</source>
<target>7</target>
</configuration>
</plugin>
</plugins>
</build>
</project>
代码:
package com.gz.sparkkududemo;
import org.apache.kudu.ColumnSchema;
import org.apache.kudu.Schema;
import org.apache.kudu.Type;
import org.apache.kudu.client.*;
import org.apache.kudu.spark.kudu.KuduContext;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* @Description TODO
* @Author gongzheng6
* @Date 2019/7/8 10:55
*/
@RunWith(SpringRunner.class)
@SpringBootTest(classes = SparkKuduDemoApplication.class)
public class KuduOption {
// master地址
private static final String KUDU_MASTER = "nn02:7051";
private static String tableName = "KuduTest";
//创建表
@Test
public void CreateTab() {
// 创建kudu的数据库链接
KuduClient client = new KuduClient.KuduClientBuilder(KUDU_MASTER).build();
try {
// 设置表的schema(模式)
List<ColumnSchema> columns = new ArrayList(2);
columns.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.STRING).key(true).build());
columns.add(new ColumnSchema.ColumnSchemaBuilder("value", Type.STRING).build());
Schema schema = new Schema(columns);
//创建表时提供的所有选项
CreateTableOptions options = new CreateTableOptions();
// 设置表的replica备份和分区规则
List<String> rangeKeys = new ArrayList<>();
rangeKeys.add("key");
// 一个replica
options.setNumReplicas(1);
// 用列rangeKeys做为分区的参照
options.setRangePartitionColumns(rangeKeys);
client.createTable(tableName, schema, options);
// 添加key的hash分区
//options.addHashPartitions(parcols, 3);
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
client.shutdown();
} catch (Exception e) {
e.printStackTrace();
}
}
}
//向表内插入新数据
@Test
public void InsertData() {
// 创建kudu的数据库链接
KuduClient client = new KuduClient.KuduClientBuilder(KUDU_MASTER).build();
try {
// 打开表
KuduTable table = client.openTable(tableName);
// 创建写session,kudu必须通过session写入
KuduSession session = client.newSession();
// 采取Flush方式 手动刷新
session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH);
session.setMutationBufferSpace(3000);
System.out.println("-------start--------" + System.currentTimeMillis());
for (int i = 1; i < 6100; i++) {
Insert insert = table.newInsert();
// 设置字段内容
PartialRow row = insert.getRow();
row.addString("key", i+"");
row.addString(1, "value"+i);
session.flush();
session.apply(insert);
}
System.out.println("-------end--------" + System.currentTimeMillis());
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
client.shutdown();
} catch (Exception e) {
e.printStackTrace();
}
}
}
//更新数据
@Test
public void kuduUpdateTest() {
// 创建kudu的数据库链接
KuduClient client = new KuduClient.KuduClientBuilder(KUDU_MASTER).build();
try {
KuduTable table = client.openTable(tableName);
KuduSession session = client.newSession();
Update update = table.newUpdate();
PartialRow row = update.getRow();
//
row.addString("key", 998 + "");
row.addString("value", "updata Data " + 10);
session.flush();
session.apply(update);
// System.out.print(operationResponse.getRowError());
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
client.shutdown();
} catch (Exception e) {
e.printStackTrace();
}
}
}
//根据主键删除数据
@Test
public void deleteData(){
KuduClient client=new KuduClient.KuduClientBuilder(KUDU_MASTER).build();
try {
KuduTable table=client.openTable(tableName);
KuduSession session=client.newSession();
Delete delete=table.newDelete();
PartialRow row=delete.getRow();
row.addString("key","992");
session.apply(delete);
} catch (KuduException e) {
e.printStackTrace();
}
}
//扫描数据
@Test
public void SearchData() {
// 创建kudu的数据库链接
KuduClient client = new KuduClient.KuduClientBuilder(KUDU_MASTER).build();
try {
KuduTable table = client.openTable(tableName);
List<String> projectColumns = new ArrayList<>(1);
projectColumns.add("value");
KuduScanner scanner = client.newScannerBuilder(table)
.setProjectedColumnNames(projectColumns)
.build();
while (scanner.hasMoreRows()) {
RowResultIterator results = scanner.nextRows();
while (results.hasNext()) {
RowResult result = results.next();
System.out.println(result.getString(0));
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
client.shutdown();
} catch (Exception e) {
e.printStackTrace();
}
}
}
//条件扫描数据
@Test
public void searchDataByCondition(){
KuduClient client =new KuduClient.KuduClientBuilder(KUDU_MASTER).build();
try {
KuduTable table=client.openTable(tableName);
KuduScanner.KuduScannerBuilder scannerBuilder=client.newScannerBuilder(table);
//设置搜索的条件
KuduPredicate predicate=KuduPredicate.
newComparisonPredicate(
table.getSchema().getColumn("key"),//设置要值的谓词(字段)
KuduPredicate.ComparisonOp.EQUAL,//设置搜索逻辑
"991");//设置搜索条件值
scannerBuilder.addPredicate(predicate);
// 开始扫描
KuduScanner scanner=scannerBuilder.build();
while(scanner.hasMoreRows()){
RowResultIterator iterator=scanner.nextRows();
while(iterator.hasNext()){
RowResult result=iterator.next();
System.out.println("输出: "+result.getString(0)+"--"+result.getString("value"));
}
}
} catch (KuduException e) {
e.printStackTrace();
}
}
//删除表
@Test
public void DelTab() {
KuduClient client = new KuduClient.KuduClientBuilder(KUDU_MASTER).build();
try {
client.deleteTable(tableName);
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
client.shutdown();
} catch (Exception e) {
e.printStackTrace();
}
}
}
//
@Test
public void searchBysparkSql() {
SparkSession sparkSession = getSparkSession();
List<StructField> fields = Arrays.asList(
DataTypes.createStructField("key", DataTypes.StringType, true),
DataTypes.createStructField("value", DataTypes.StringType, true));
StructType schema = DataTypes.createStructType(fields);
Dataset ds = sparkSession.read().format("org.apache.kudu.spark.kudu").
schema(schema).option("kudu.master", "nn02:7051").option("kudu.table", "KuduTest").load();
ds.registerTempTable("abc");
sparkSession.sql("select * from abc").show();
}
@Test
public void checkTableExistByKuduContext() {
SparkSession sparkSession = getSparkSession();
KuduContext context = new KuduContext("172.19.224.213:7051", sparkSession.sparkContext());
System.out.println(tableName + " is exist = " + context.tableExists(tableName));
}
public SparkSession getSparkSession() {
SparkConf conf = new SparkConf().setAppName("test")
.setMaster("local[*]")
.set("spark.driver.userClassPathFirst", "true");
conf.set("spark.sql.crossJoin.enabled", "true");
SparkContext sparkContext = new SparkContext(conf);
SparkSession sparkSession = SparkSession.builder().sparkContext(sparkContext).getOrCreate();
return sparkSession;
}
}