Flink sql 1.13 读kafka 实时写Hbase

思路解析

1. 在hbase创建hbase-table
2. 代码中注册HbaseSinkTable 关联 hbase-table
3. 代码中注册KafkasourceTable
4. 代码中执行 insert select sql。

参考文章

Flink-sql kafka 实时写入Hive

版本

Hbase 2.1.0-cdh6.1.1

pom文件

文件中有一些没有用的依赖,可以自己剔除。

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <maven.compiler.source>1.8</maven.compiler.source>
    <maven.compiler.target>1.8</maven.compiler.target>
    <flink.version>1.13.1</flink.version>
    <scala.version>2.11</scala.version>
<!--    <hive.version>2.1.1</hive.version>-->
    <hive.version>2.1.1-cdh6.1.1</hive.version>
<!--    <hadoop.version>3.0.0</hadoop.version>-->
    <hadoop.version>3.0.0-cdh6.1.1</hadoop.version>
    <log4j.version>2.8.2</log4j.version>
    <fastjson.version>1.2.7</fastjson.version>

  </properties>
  <!--    <dependency>-->
  <!--      <groupId>org.apache.flink</groupId>-->
  <!--&lt;!&ndash;      <artifactId>flink-sql-connector-hive-${hive.version}_${scala.version}</artifactId>&ndash;&gt;-->
  <!--      <artifactId>flink-sql-connector-hive-2.2.0_${scala.version}</artifactId>-->
  <!--      <version>${flink.version}</version>-->
  <!--      <scope>provided</scope>-->
  <!--    </dependency>-->
      <repositories>
          <repository>
              <id>cloudera</id>
              <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
          </repository>
          <!--    <repository>-->
          <!--      <id>spring-plugin</id>-->
          <!--      <url>https://repo.spring.io/plugins-release/</url>-->
          <!--    </repository>-->
      </repositories>

  <dependencies>
    <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-compress -->
    <dependency>
      <groupId>org.apache.commons</groupId>
      <artifactId>commons-compress</artifactId>
      <version>1.19</version>
    </dependency>
    <dependency>
      <groupId>com.alibaba</groupId>
      <artifactId>fastjson</artifactId>
      <version>${fastjson.version}</version>
    </dependency>

    <dependency>
        <groupId>commons-cli</groupId>
        <artifactId>commons-cli</artifactId>
        <version>1.4</version>
    </dependency>
    <!-- Apache Flink dependencies -->
    <!-- These dependencies are provided, because they should not be packaged into the JAR file. -->
<!--    <dependency>-->
<!--      <groupId>org.apache.flink</groupId>-->
<!--      <artifactId>flink-scala_2.11</artifactId>-->
<!--      <version>${flink.version}</version>-->
<!--      <scope>provided</scope>-->
<!--    </dependency>-->
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-streaming-scala_${scala.version}</artifactId>
      <version>${flink.version}</version>
<!--      <scope>provided</scope>-->
    </dependency>
    <dependency>
      <groupId>org.apache.flink</groupId>
<!--      <artifactId>flink-table-api-java-bridge_2.11</artifactId>-->
      <artifactId>flink-table-api-scala-bridge_2.11</artifactId>
      <version>${flink.version}</version>
<!--      <scope>provided</scope>-->
    </dependency>
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-table-api-java-bridge_2.11</artifactId>
<!--      <artifactId>flink-table-api-scala-bridge_2.11</artifactId>-->
      <version>${flink.version}</version>
<!--      <scope>provided</scope>-->
    </dependency>

<!--    <dependency>-->
<!--      <groupId>org.apache.flink</groupId>-->
<!--      <artifactId>flink-streaming-scala_${scala.version}</artifactId>-->
<!--      <version>${flink.version}</version>-->
<!--      <scope>provided</scope>-->
<!--    </dependency>-->

    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-clients_${scala.version}</artifactId>
      <version>${flink.version}</version>
      <exclusions>
        <exclusion>
          <artifactId>commons-compress</artifactId>
          <groupId>org.apache.commons</groupId>
        </exclusion>
      </exclusions>
      <!--      <scope>provided</scope>-->
    </dependency>

    <!-- Flink SQL dependencies -->

<!--    <dependency>-->
<!--      <groupId>org.apache.flink</groupId>-->
<!--&lt;!&ndash;      <artifactId>flink-table-api-java-bridge_2.11</artifactId>&ndash;&gt;-->
<!--      <artifactId>flink-table-api-scala-bridge_2.11</artifactId>-->
<!--      <version>${flink.version}</version>-->
<!--      <scope>provided</scope>-->
<!--    </dependency>-->
<!--    <dependency>-->
<!--      <groupId>org.apache.flink</groupId>-->
<!--      <artifactId>flink-table-planner_${scala.version}</artifactId>-->
<!--      <version>${flink.version}</version>-->
<!--      <scope>provided</scope>-->
<!--      <exclusions>-->
<!--        <exclusion>-->
<!--          <artifactId>slf4j-api</artifactId>-->
<!--          <groupId>org.slf4j</groupId>-->
<!--        </exclusion>-->
<!--      </exclusions>-->
<!--    </dependency>-->
    <!-- https://mvnrepository.com/artifact/com.ibm/com.ibm.icu -->
    <!-- https://mvnrepository.com/artifact/com.ibm/com.ibm.icu -->
<!--    <dependency>-->
<!--      <groupId>com.ibm</groupId>-->
<!--      <artifactId>com.ibm.icu</artifactId>-->
<!--      <version>3.6.1.v20070906</version>-->
<!--    </dependency>-->


    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-table-planner-blink_2.11</artifactId>
      <version>${flink.version}</version>
      <scope>provided</scope>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-table-runtime-blink -->
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-table-runtime-blink_${scala.version}</artifactId>
      <version>${flink.version}</version>
      <scope>provided</scope>
    </dependency>


    <!--    <dependency>-->
<!--      <groupId>org.apache.flink</groupId>-->
<!--      <artifactId>flink-streaming-scala_2.11</artifactId>-->
<!--      <version>${flink.version}</version>-->
<!--      <scope>provided</scope>-->
<!--    </dependency>-->
    <!-- https://mvnrepository.com/artifact/org.apache.calcite/calcite-core -->


    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-table-common</artifactId>
      <version>${flink.version}</version>
<!--      <scope>provided</scope>-->
    </dependency>
    <!-- Add connector dependencies here. They must be in the default scope (compile). -->


    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-sql-connector-kafka_${scala.version}</artifactId>
      <version>${flink.version}</version>
    </dependency>

    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-connector-hbase-2.2_${scala.version}</artifactId>
      <version>${flink.version}</version>
    </dependency>


    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-json</artifactId>
      <version>${flink.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-connector-hive_2.11</artifactId>
      <version>${flink.version}</version>
      <!--             <scope>provided</scope>-->
    </dependency>

    <dependency>
      <groupId>org.apache.hive</groupId>
      <artifactId>hive-exec</artifactId>
      <version>${hive.version}</version>
<!--      <version>2.1.1-cdh6.1.1</version>-->
      <exclusions>
        <exclusion>
          <artifactId>calcite-avatica</artifactId>
          <groupId>org.apache.calcite</groupId>
        </exclusion>
        <exclusion>
          <artifactId>calcite-core</artifactId>
          <groupId>org.apache.calcite</groupId>
        </exclusion>
        <exclusion>
          <artifactId>calcite-linq4j</artifactId>
          <groupId>org.apache.calcite</groupId>
        </exclusion>
        <exclusion>
          <artifactId>commons-compress</artifactId>
          <groupId>org.apache.commons</groupId>
        </exclusion>
      </exclusions>
      <!--             <scope>provided</scope>-->
    </dependency>

    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-client</artifactId>
      <version>${hadoop.version}</version>
      <scope>provided</scope>
      <exclusions>
        <exclusion>
          <artifactId>commons-compress</artifactId>
          <groupId>org.apache.commons</groupId>
        </exclusion>
      </exclusions>
    </dependency>
    <!--<dependency>-->
    <!--<groupId>org.apache.flink</groupId>-->
    <!--<artifactId>flink-shaded-hadoop-3</artifactId>-->
    <!--<version>3.1.1.7.1.1.0-565-9.0</version>-->
    <!--<scope>provided</scope> -->
    <!--</dependency>-->


    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-parquet_2.11</artifactId>
      <version>${flink.version}</version>
    </dependency>

    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-orc_2.11</artifactId>
      <version>${flink.version}</version>
    </dependency>

    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-connector-kafka_${scala.version}</artifactId>
      <version>${flink.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-connector-base</artifactId>
      <version>${flink.version}</version>
    </dependency>


    <!--    <dependency>-->
<!--      <groupId>org.apache.kafka</groupId>-->
<!--      <artifactId>kafka-clients</artifactId>-->
<!--      <version>1.0.1</version>-->
<!--    </dependency>-->
<!--    <dependency>-->
<!--      <groupId>org.projectlombok</groupId>-->
<!--      <artifactId>lombok</artifactId>-->
<!--      <version>1.16.18</version>-->
<!--    </dependency>-->


<!--     Add logging framework, to produce console output when running in the IDE.-->
<!--     These dependencies are excluded from the application JAR by default.-->
    <dependency>
      <groupId>org.apache.logging.log4j</groupId>
      <artifactId>log4j-slf4j-impl</artifactId>
      <version>${log4j.version}</version>
      <scope>runtime</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.logging.log4j</groupId>
      <artifactId>log4j-api</artifactId>
      <version>${log4j.version}</version>
      <scope>runtime</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.logging.log4j</groupId>
      <artifactId>log4j-core</artifactId>
      <version>${log4j.version}</version>
      <scope>runtime</scope>
    </dependency>

<!--    <dependency>-->
<!--      <groupId>com.alibaba</groupId>-->
<!--      <artifactId>fastjson</artifactId>-->
<!--      <version>1.2.72</version>-->
<!--    </dependency>-->
    <dependency>
      <groupId>mysql</groupId>
      <artifactId>mysql-connector-java</artifactId>
      <version>5.1.46</version>
    </dependency>
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-connector-jdbc_2.11</artifactId>
      <version>${flink.version}</version>
    </dependency>
    <!--<dependency>-->
    <!--<groupId>org.apache.flink</groupId>-->
    <!--<artifactId>flink-shaded-hadoop-2-uber</artifactId>-->
    <!--<version>${flink-shaded-hadoop.version}</version>-->
    <!--</dependency>-->
  </dependencies>


Scala demo

import org.apache.flink.api.scala._
import org.apache.flink.runtime.entrypoint.ClusterEntrypoint.ExecutionMode
import org.apache.flink.streaming.api.CheckpointingMode
import org.apache.flink.table.api._
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment

/**
 * @Author: jhon_yh
 * @Description: sth todo
 */

// hbase version 2.1.0-cdh6.1.1
object FlinkKafkaHbase extends Logging{
  def main(args: Array[String]): Unit = {
    //获取 流式运行环境
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.enableCheckpointing(1000L*60, CheckpointingMode.EXACTLY_ONCE)
    env.getCheckpointConfig.setCheckpointStorage("hdfs://ns/test/yh/checkpoint/hbase/")


    //
    val settings = EnvironmentSettings.newInstance().useBlinkPlanner().build()
    //获取流式表的运行环境
    val tEnv = StreamTableEnvironment.create(env, settings)


    //获取kafka 流
    tEnv.getConfig.setSqlDialect(SqlDialect.DEFAULT)
    tEnv.executeSql("drop table if exists KafkaSourceTable")

    //ts as to_timestamp(from_unixtime(tid/1000, 'yy-MM-dd HH:mm:ss'))

    //  user_action_time AS PROCTIME() -- 声明一个额外的列作为处理时间属性
    val createSourceTableSql: String =
      """
        |create table KafkaSourceTable(
        | vin String,
        | tid String,
        | source String,
        | datas String,
        | ts as substr(concat('20',tid),1,8 )
        | ) WITH (
        | 'connector' = 'kafka',
        | 'topic' = 'HW_BIGDATA',
        | 'properties.bootstrap.servers' = 'bs:9092',
        | 'properties.group.id' = 'flink-test-group',
        | 'format' = 'json',
        | 'scan.startup.mode' = 'earliest-offset',
        | 'json.ignore-parse-errors' = 'true',
        | 'scan.startup.mode' = 'group-offsets'
        |)
        """.stripMargin
    tEnv.executeSql(createSourceTableSql)

    val createSinkTableSql: String =
      """
        |CREATE TABLE mytable (
        | rowkey String,
        | cf1 ROW<tid String, source String, datas String, ts String>,
        | PRIMARY KEY (rowkey) NOT ENFORCED
        |) WITH (
        | 'connector' = 'hbase-2.2',
        | 'table-name' = 'gdc_dev_test:mytable',
        | 'zookeeper.quorum' = 'zk:2181'
        |)
        |""".stripMargin
    log.info(s"createSinkTableSql: $createSinkTableSql")
    tEnv.executeSql(createSinkTableSql)


    val insertSql: String =
      """
        |INSERT INTO mytable
        |SELECT vin, ROW(tid, source, datas, ts) FROM KafkaSourceTable
        |""".stripMargin

    log.info(s"insertSql: $insertSql")
    tEnv.executeSql(insertSql)

  }
}


运行效果图

flink on yarn
在这里插入图片描述

Hbase 数据截图

在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值