spark读写kudu2

maven依赖

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>2.1.6.RELEASE</version>
        <relativePath/> <!-- lookup parent from repository -->
    </parent>
    <groupId>com.gz</groupId>
    <artifactId>spark-kudu-demo</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <name>spark-kudu-demo</name>
    <description>Demo project for Spring Boot</description>

    <properties>
        <java.version>1.8</java.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter</artifactId>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.kudu</groupId>
            <artifactId>kudu-spark2_2.11</artifactId>
            <version>1.7.0</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
        </dependency>

        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.11</artifactId>
            <version>2.3.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.11</artifactId>
            <version>2.3.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.kudu</groupId>
            <artifactId>kudu-client</artifactId>
            <version>1.7.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.kudu</groupId>
            <artifactId>kudu-client-tools</artifactId>
            <version>1.7.0</version>
        </dependency>


    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>7</source>
                    <target>7</target>
                </configuration>
            </plugin>
        </plugins>
    </build>

</project>

代码:

package com.gz.sparkkududemo;

import org.apache.kudu.ColumnSchema;
import org.apache.kudu.Schema;
import org.apache.kudu.Type;
import org.apache.kudu.client.*;
import org.apache.kudu.spark.kudu.KuduContext;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/**
 * @Description TODO
 * @Author gongzheng6
 * @Date 2019/7/8 10:55
 */
@RunWith(SpringRunner.class)
@SpringBootTest(classes = SparkKuduDemoApplication.class)
public class KuduOption {
    // master地址
    private static final String KUDU_MASTER = "nn02:7051";

    private static String tableName = "KuduTest";

    //创建表
    @Test
    public void CreateTab() {
        // 创建kudu的数据库链接
        KuduClient client = new KuduClient.KuduClientBuilder(KUDU_MASTER).build();

        try {
            // 设置表的schema(模式)
            List<ColumnSchema> columns = new ArrayList(2);
            columns.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.STRING).key(true).build());
            columns.add(new ColumnSchema.ColumnSchemaBuilder("value", Type.STRING).build());
            Schema schema = new Schema(columns);

            //创建表时提供的所有选项
            CreateTableOptions options = new CreateTableOptions();

            // 设置表的replica备份和分区规则
            List<String> rangeKeys = new ArrayList<>();
            rangeKeys.add("key");

            // 一个replica
            options.setNumReplicas(1);
            // 用列rangeKeys做为分区的参照
            options.setRangePartitionColumns(rangeKeys);
            client.createTable(tableName, schema, options);

            // 添加key的hash分区
            //options.addHashPartitions(parcols, 3);
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                client.shutdown();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    //向表内插入新数据
    @Test
    public void InsertData() {
        // 创建kudu的数据库链接
        KuduClient client = new KuduClient.KuduClientBuilder(KUDU_MASTER).build();
        try {
            // 打开表
            KuduTable table = client.openTable(tableName);
            // 创建写session,kudu必须通过session写入
            KuduSession session = client.newSession();

            // 采取Flush方式 手动刷新
            session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH);
            session.setMutationBufferSpace(3000);

            System.out.println("-------start--------" + System.currentTimeMillis());

            for (int i = 1; i < 6100; i++) {
                Insert insert = table.newInsert();
                // 设置字段内容
                PartialRow row = insert.getRow();
                row.addString("key", i+"");
                row.addString(1, "value"+i);
                session.flush();
                session.apply(insert);
            }
            System.out.println("-------end--------" + System.currentTimeMillis());
        } catch (Exception e) {
            e.printStackTrace();

        } finally {
            try {
                client.shutdown();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    //更新数据
    @Test
    public void kuduUpdateTest() {
        // 创建kudu的数据库链接
        KuduClient client = new KuduClient.KuduClientBuilder(KUDU_MASTER).build();
        try {
            KuduTable table = client.openTable(tableName);
            KuduSession session = client.newSession();

            Update update = table.newUpdate();
            PartialRow row = update.getRow();

            //
            row.addString("key", 998 + "");
            row.addString("value", "updata Data " + 10);
            session.flush();
            session.apply(update);

//            System.out.print(operationResponse.getRowError());

        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                client.shutdown();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }

    }

    //根据主键删除数据
    @Test
    public void deleteData(){
        KuduClient client=new KuduClient.KuduClientBuilder(KUDU_MASTER).build();
        try {
            KuduTable table=client.openTable(tableName);
            KuduSession session=client.newSession();

            Delete delete=table.newDelete();
            PartialRow row=delete.getRow();
            row.addString("key","992");

            session.apply(delete);
        } catch (KuduException e) {
            e.printStackTrace();
        }
    }

    //扫描数据
    @Test
    public void SearchData() {
        // 创建kudu的数据库链接
        KuduClient client = new KuduClient.KuduClientBuilder(KUDU_MASTER).build();

        try {
            KuduTable table = client.openTable(tableName);

            List<String> projectColumns = new ArrayList<>(1);
            projectColumns.add("value");
            KuduScanner scanner = client.newScannerBuilder(table)
                    .setProjectedColumnNames(projectColumns)
                    .build();
            while (scanner.hasMoreRows()) {
                RowResultIterator results = scanner.nextRows();
                while (results.hasNext()) {
                    RowResult result = results.next();
                    System.out.println(result.getString(0));
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                client.shutdown();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    //条件扫描数据
    @Test
    public void searchDataByCondition(){
        KuduClient client =new KuduClient.KuduClientBuilder(KUDU_MASTER).build();

        try {
            KuduTable table=client.openTable(tableName);

            KuduScanner.KuduScannerBuilder scannerBuilder=client.newScannerBuilder(table);

            //设置搜索的条件
            KuduPredicate predicate=KuduPredicate.
                    newComparisonPredicate(
                            table.getSchema().getColumn("key"),//设置要值的谓词(字段)
                            KuduPredicate.ComparisonOp.EQUAL,//设置搜索逻辑
                            "991");//设置搜索条件值
            scannerBuilder.addPredicate(predicate);

            // 开始扫描
            KuduScanner scanner=scannerBuilder.build();
            while(scanner.hasMoreRows()){
                RowResultIterator iterator=scanner.nextRows();
                while(iterator.hasNext()){
                    RowResult result=iterator.next();
                    System.out.println("输出: "+result.getString(0)+"--"+result.getString("value"));
                }
            }
        } catch (KuduException e) {
            e.printStackTrace();
        }
    }

    //删除表
    @Test
    public void DelTab() {
        KuduClient client = new KuduClient.KuduClientBuilder(KUDU_MASTER).build();
        try {
            client.deleteTable(tableName);
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                client.shutdown();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }


    //
    @Test
    public void searchBysparkSql() {
        SparkSession sparkSession = getSparkSession();
        List<StructField> fields = Arrays.asList(
                DataTypes.createStructField("key", DataTypes.StringType, true),
                DataTypes.createStructField("value", DataTypes.StringType, true));
        StructType schema = DataTypes.createStructType(fields);
        Dataset ds = sparkSession.read().format("org.apache.kudu.spark.kudu").
                schema(schema).option("kudu.master", "nn02:7051").option("kudu.table", "KuduTest").load();
        ds.registerTempTable("abc");
        sparkSession.sql("select * from abc").show();
    }

    @Test
    public void checkTableExistByKuduContext() {
        SparkSession sparkSession = getSparkSession();
        KuduContext context = new KuduContext("172.19.224.213:7051", sparkSession.sparkContext());
        System.out.println(tableName + " is exist = " + context.tableExists(tableName));
    }

    public SparkSession getSparkSession() {
        SparkConf conf = new SparkConf().setAppName("test")
                .setMaster("local[*]")
                .set("spark.driver.userClassPathFirst", "true");

        conf.set("spark.sql.crossJoin.enabled", "true");
        SparkContext sparkContext = new SparkContext(conf);
        SparkSession sparkSession = SparkSession.builder().sparkContext(sparkContext).getOrCreate();
        return sparkSession;
    }
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值