Flink Table 读写Hbase

1、获取表环境

批处理为例

        // 批执行环境
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
//        StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment();

        // 表环境
        EnvironmentSettings settings = EnvironmentSettings.newInstance()
                .useBlinkPlanner()
                .inBatchMode()
                .build();
        TableEnvironment tableEnv = TableEnvironment.create(settings);

2、创建Hbase表连接

        // 明细表
        TableResult tableResult = tableEnv.executeSql(
                "CREATE TABLE DWD_YDJT_SCALEANDINC_DATE (" +
                        " rowkey STRING," +
                        " info ROW<businessDataId STRING, businessDate STRING, businessIncome DOUBLE, businessScale DOUBLE, channelSource STRING>," +
                        " PRIMARY KEY (rowkey) NOT ENFORCED" +
                        " ) WITH (" +
                        " 'connector' = 'hbase-2.2' ," +
                        " 'table-name' = 'DWD_YDJT_SCALEANDINC_DATE' ," +
                        " 'zookeeper.quorum' = 'linux121:2181,linux122:2181,linux123:2181'" +
                        " )");
        //聚合表
        TableResult dwsTableResult = tableEnv.executeSql(
                "CREATE TABLE DWS_YDJT_SCALEANDINC_DATE (" +
                        " rowkey STRING," +
                        " info ROW< channelSource STRING, businessDate STRING, businessIncome DOUBLE, businessScale DOUBLE>," +
                        " PRIMARY KEY (rowkey) NOT ENFORCED" +
                        " ) WITH (" +
                        " 'connector' = 'hbase-2.2' ," +
                        " 'table-name' = 'DWS_YDJT_SCALEANDINC_DATE' ," +
                        " 'zookeeper.quorum' = 'linux121:2181'" +
                        " )");

注意:connector’ = ‘hbase-2.2’ hbase版本要求不是很严格,以官网为例
https://ci.apache.org/projects/flink/flink-docs-release-1.13/zh/docs/connectors/table/overview/
在这里插入图片描述

3、Hbase读取计算和写入

3.1利用Flink SQL读取Hbase明细表和计算

读取数据并创建临时试图dwsTable

        Table table = tableEnv.sqlQuery("select TO_BASE64(channelSource||businessDate) ,channelSource,businessDate,sum(businessIncome) as businessIncome, sum(businessScale) as businessScale FROM DWD_YDJT_SCALEANDINC_DATE group by channelSource,businessDate");
        tableEnv.createTemporaryView("dwsTable", table);

3.2写入Hbase聚合表

        //        INSERT INTO hTable
//        SELECT rowkey, ROW(f1q1), ROW(f2q2, f2q3), ROW(f3q4, f3q5, f3q6) FROM T;
        TableResult executeResult = tableEnv.executeSql("insert into DWS_YDJT_SCALEANDINC_DATE " +
        "select TO_BASE64(channelSource||businessDate) ,ROW(channelSource,businessDate,businessIncome, businessScale) FROM dwsTable ");

参考官网:https://ci.apache.org/projects/flink/flink-docs-release-1.13/zh/docs/connectors/table/hbase/#%E5%A6%82%E4%BD%95%E4%BD%BF%E7%94%A8-hbase-%E8%A1%A8

4、pom

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>flinksqlConnHbase</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
        <flink.version>1.12.1</flink.version>
        <scala.binary.version>2.12</scala.binary.version>
        <hive.version>3.1.2</hive.version>
        <mysql.version>8.0.19</mysql.version>
        <hbase.version>2.4.0</hbase.version>
    </properties>


    <dependencies>
        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-hbase -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-hbase_2.12</artifactId>
            <version>1.10.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-scala_2.12</artifactId>
            <version>1.12.1</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-scala -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-scala_2.12</artifactId>
            <version>1.11.1</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_2.12</artifactId>
            <version>1.11.1</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka_2.12</artifactId>
            <version>1.11.1</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.73</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-hbase -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-hbase_2.12</artifactId>
            <version>1.10.1</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.9.2</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.9.2</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.9.2</version>
        </dependency>
        <!--https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core-->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>2.9.2</version>
        </dependency>

        <!-- Flink -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <!-- HBase -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-hbase-2.2_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>${hbase.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>${hbase.version}</version>
        </dependency>

        <!-- Table API -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner-blink_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <!-- csv -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-csv</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <!--        &lt;!&ndash; Lombok &ndash;&gt;-->
        <!--        <dependency>-->
        <!--            <groupId>org.projectlombok</groupId>-->
        <!--            <artifactId>lombok</artifactId>-->
        <!--            <version>1.18.18</version>-->
        <!--        </dependency>-->

    </dependencies>
</project>

5、demo代码

import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.TableResult;
import org.apache.flink.table.descriptors.ConnectTableDescriptor;
import org.apache.flink.table.descriptors.HBase;
import org.apache.flink.types.Row;
import org.apache.flink.util.CloseableIterator;

import java.util.ArrayList;
import java.util.List;
import java.util.function.Consumer;

public class HBaseTest {
    public static void main(String[] args) throws Exception {
        // 批执行环境
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
//        StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment();

        // 表环境
        EnvironmentSettings settings = EnvironmentSettings.newInstance()
                .useBlinkPlanner()
                .inBatchMode()
                .build();
        TableEnvironment tableEnv = TableEnvironment.create(settings);

        // 明细表
        TableResult tableResult = tableEnv.executeSql(
                "CREATE TABLE DWD_YDJT_SCALEANDINC_DATE (" +
                        " rowkey STRING," +
                        " info ROW<businessDataId STRING, businessDate STRING, businessIncome DOUBLE, businessScale DOUBLE, channelSource STRING>," +
                        " PRIMARY KEY (rowkey) NOT ENFORCED" +
                        " ) WITH (" +
                        " 'connector' = 'hbase-2.2' ," +
                        " 'table-name' = 'DWD_YDJT_SCALEANDINC_DATE' ," +
                        " 'zookeeper.quorum' = 'linux121:2181,linux122:2181,linux123:2181'" +
                        " )");
        //聚合表
        TableResult dwsTableResult = tableEnv.executeSql(
                "CREATE TABLE DWS_YDJT_SCALEANDINC_DATE (" +
                        " rowkey STRING," +
                        " info ROW< channelSource STRING, businessDate STRING, businessIncome DOUBLE, businessScale DOUBLE>," +
                        " PRIMARY KEY (rowkey) NOT ENFORCED" +
                        " ) WITH (" +
                        " 'connector' = 'hbase-2.2' ," +
                        " 'table-name' = 'DWS_YDJT_SCALEANDINC_DATE' ," +
                        " 'zookeeper.quorum' = 'linux121:2181'" +
                        " )");

        // 查询是否能获取到HBase里的数据
//        Table table = tableEnv.sqlQuery("SELECT rowkey, info FROM DWD_YDJT_SCALEANDINC_DATE");

        Table table = tableEnv.sqlQuery("select TO_BASE64(channelSource||businessDate) ,channelSource,businessDate,sum(businessIncome) as businessIncome, sum(businessScale) as businessScale FROM DWD_YDJT_SCALEANDINC_DATE group by channelSource,businessDate");
        tableEnv.createTemporaryView("dwsTable", table);

        //        INSERT INTO hTable
//        SELECT rowkey, ROW(f1q1), ROW(f2q2, f2q3), ROW(f3q4, f3q5, f3q6) FROM T;
        TableResult executeResult = tableEnv.executeSql("insert into DWS_YDJT_SCALEANDINC_DATE " +
        "select TO_BASE64(channelSource||businessDate) ,ROW(channelSource,businessDate,businessIncome, businessScale) FROM dwsTable ");

        // 查询的结果
//        TableResult executeResult = table.execute();


        // 获取查询结果
        CloseableIterator<Row> collect = executeResult.collect();

        // 输出 (执行print或者下面的 Consumer之后,数据就被消费了。两个只能留下一个)
        executeResult.print();

        List<DwdYdjtScaleAndIncDate> dwdList = new ArrayList<>();

        //结构遍历解析
        collect.forEachRemaining(new Consumer<Row>() {
            @Override
            public void accept(Row row) {
                System.out.println(".......Consumer.........");
                String field0 = String.valueOf(row.getField(0));
                String[] user_movie = String.valueOf(row.getField(1)).split(",");
//                Double ratting = Double.valueOf(String.valueOf(row.getField(1)));
                dwdList.add(new DwdYdjtScaleAndIncDate(user_movie[0],user_movie[1],user_movie[1],user_movie[1],user_movie[1]));
            }
        });


        System.out.println("................");

        for(DwdYdjtScaleAndIncDate um : dwdList){
            System.out.println(um);
        }

        env.execute("OutputTableDemo");
    }
}


//public class HBaseTest {
//    public static void main(String[] args) throws Exception {
//        // 批执行环境
//        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
//
//        // 表环境
//        EnvironmentSettings settings = EnvironmentSettings.newInstance()
//                .useBlinkPlanner()
//                .inBatchMode()
//                .build();
//        TableEnvironment tableEnv = TableEnvironment.create(settings);
//
//        // 创建用户-电影表 u_m
//        TableResult tableResult = tableEnv.executeSql(
//                "CREATE TABLE DWD_YDJT_SCALEANDINC_DATE (" +
//                        " rowkey STRING," +
//                        " info ROW<businessDataId STRING, businessDate STRING, businessIncome STRING, businessScale STRING, channelSource STRING>," +
//                        " PRIMARY KEY (rowkey) NOT ENFORCED" +
//                        " ) WITH (" +
//                        " 'connector' = 'hbase-2.2' ," +
//                        " 'table-name' = 'DWD_YDJT_SCALEANDINC_DATE' ," +
//                        " 'zookeeper.quorum' = 'linux121:2181,linux122:2181,linux123:2181'" +
//                        " )");
//
//        // 查询是否能获取到HBase里的数据
//        Table table = tableEnv.sqlQuery("SELECT rowkey, info FROM DWD_YDJT_SCALEANDINC_DATE");
//
//        // 相当于 scan
        Table table = tableEnv.sqlQuery("SELECT * FROM DWD_YDJT_SCALEANDINC_DATE");
//
//        // 查询的结果
//        TableResult executeResult = table.execute();
//
//        // 获取查询结果
//        CloseableIterator<Row> collect = executeResult.collect();
//
//        // 输出 (执行print或者下面的 Consumer之后,数据就被消费了。两个只能留下一个)
//        executeResult.print();
//
//        List<DwdYdjtScaleAndIncDate> dwdList = new ArrayList<>();
//
//        collect.forEachRemaining(new Consumer<Row>() {
//            @Override
//            public void accept(Row row) {
//                String field0 = String.valueOf(row.getField(0));
//                String[] user_movie = field0.split(",");
//                Double ratting = Double.valueOf(String.valueOf(row.getField(1)));
//                dwdList.add(new DwdYdjtScaleAndIncDate(user_movie[0],user_movie[1],user_movie[1],user_movie[1],user_movie[1]));
//            }
//        });
//
//
//        System.out.println("................");
//
//        for(DwdYdjtScaleAndIncDate um : dwdList){
//            System.out.println(um);
//        }
//
//
//    }
//}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值