思路解析
1. 在hbase创建hbase-table
2. 代码中注册HbaseSinkTable 关联 hbase-table
3. 代码中注册KafkasourceTable
4. 代码中执行 insert select sql。
参考文章
版本
Hbase 2.1.0-cdh6.1.1
pom文件
文件中有一些没有用的依赖,可以自己剔除。
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<flink.version>1.13.1</flink.version>
<scala.version>2.11</scala.version>
<!-- <hive.version>2.1.1</hive.version>-->
<hive.version>2.1.1-cdh6.1.1</hive.version>
<!-- <hadoop.version>3.0.0</hadoop.version>-->
<hadoop.version>3.0.0-cdh6.1.1</hadoop.version>
<log4j.version>2.8.2</log4j.version>
<fastjson.version>1.2.7</fastjson.version>
</properties>
<!-- <dependency>-->
<!-- <groupId>org.apache.flink</groupId>-->
<!--<!– <artifactId>flink-sql-connector-hive-${hive.version}_${scala.version}</artifactId>–>-->
<!-- <artifactId>flink-sql-connector-hive-2.2.0_${scala.version}</artifactId>-->
<!-- <version>${flink.version}</version>-->
<!-- <scope>provided</scope>-->
<!-- </dependency>-->
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
<!-- <repository>-->
<!-- <id>spring-plugin</id>-->
<!-- <url>https://repo.spring.io/plugins-release/</url>-->
<!-- </repository>-->
</repositories>
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-compress -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.19</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>${fastjson.version}</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.4</version>
</dependency>
<!-- Apache Flink dependencies -->
<!-- These dependencies are provided, because they should not be packaged into the JAR file. -->
<!-- <dependency>-->
<!-- <groupId>org.apache.flink</groupId>-->
<!-- <artifactId>flink-scala_2.11</artifactId>-->
<!-- <version>${flink.version}</version>-->
<!-- <scope>provided</scope>-->
<!-- </dependency>-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_${scala.version}</artifactId>
<version>${flink.version}</version>
<!-- <scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<!-- <artifactId>flink-table-api-java-bridge_2.11</artifactId>-->
<artifactId>flink-table-api-scala-bridge_2.11</artifactId>
<version>${flink.version}</version>
<!-- <scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_2.11</artifactId>
<!-- <artifactId>flink-table-api-scala-bridge_2.11</artifactId>-->
<version>${flink.version}</version>
<!-- <scope>provided</scope>-->
</dependency>
<!-- <dependency>-->
<!-- <groupId>org.apache.flink</groupId>-->
<!-- <artifactId>flink-streaming-scala_${scala.version}</artifactId>-->
<!-- <version>${flink.version}</version>-->
<!-- <scope>provided</scope>-->
<!-- </dependency>-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_${scala.version}</artifactId>
<version>${flink.version}</version>
<exclusions>
<exclusion>
<artifactId>commons-compress</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
</exclusions>
<!-- <scope>provided</scope>-->
</dependency>
<!-- Flink SQL dependencies -->
<!-- <dependency>-->
<!-- <groupId>org.apache.flink</groupId>-->
<!--<!– <artifactId>flink-table-api-java-bridge_2.11</artifactId>–>-->
<!-- <artifactId>flink-table-api-scala-bridge_2.11</artifactId>-->
<!-- <version>${flink.version}</version>-->
<!-- <scope>provided</scope>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>org.apache.flink</groupId>-->
<!-- <artifactId>flink-table-planner_${scala.version}</artifactId>-->
<!-- <version>${flink.version}</version>-->
<!-- <scope>provided</scope>-->
<!-- <exclusions>-->
<!-- <exclusion>-->
<!-- <artifactId>slf4j-api</artifactId>-->
<!-- <groupId>org.slf4j</groupId>-->
<!-- </exclusion>-->
<!-- </exclusions>-->
<!-- </dependency>-->
<!-- https://mvnrepository.com/artifact/com.ibm/com.ibm.icu -->
<!-- https://mvnrepository.com/artifact/com.ibm/com.ibm.icu -->
<!-- <dependency>-->
<!-- <groupId>com.ibm</groupId>-->
<!-- <artifactId>com.ibm.icu</artifactId>-->
<!-- <version>3.6.1.v20070906</version>-->
<!-- </dependency>-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_2.11</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-table-runtime-blink -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-runtime-blink_${scala.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<!-- <dependency>-->
<!-- <groupId>org.apache.flink</groupId>-->
<!-- <artifactId>flink-streaming-scala_2.11</artifactId>-->
<!-- <version>${flink.version}</version>-->
<!-- <scope>provided</scope>-->
<!-- </dependency>-->
<!-- https://mvnrepository.com/artifact/org.apache.calcite/calcite-core -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-common</artifactId>
<version>${flink.version}</version>
<!-- <scope>provided</scope>-->
</dependency>
<!-- Add connector dependencies here. They must be in the default scope (compile). -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-sql-connector-kafka_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-hbase-2.2_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-json</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-hive_2.11</artifactId>
<version>${flink.version}</version>
<!-- <scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
<!-- <version>2.1.1-cdh6.1.1</version>-->
<exclusions>
<exclusion>
<artifactId>calcite-avatica</artifactId>
<groupId>org.apache.calcite</groupId>
</exclusion>
<exclusion>
<artifactId>calcite-core</artifactId>
<groupId>org.apache.calcite</groupId>
</exclusion>
<exclusion>
<artifactId>calcite-linq4j</artifactId>
<groupId>org.apache.calcite</groupId>
</exclusion>
<exclusion>
<artifactId>commons-compress</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
</exclusions>
<!-- <scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<artifactId>commons-compress</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
</exclusions>
</dependency>
<!--<dependency>-->
<!--<groupId>org.apache.flink</groupId>-->
<!--<artifactId>flink-shaded-hadoop-3</artifactId>-->
<!--<version>3.1.1.7.1.1.0-565-9.0</version>-->
<!--<scope>provided</scope> -->
<!--</dependency>-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-parquet_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-orc_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-base</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- <dependency>-->
<!-- <groupId>org.apache.kafka</groupId>-->
<!-- <artifactId>kafka-clients</artifactId>-->
<!-- <version>1.0.1</version>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>org.projectlombok</groupId>-->
<!-- <artifactId>lombok</artifactId>-->
<!-- <version>1.16.18</version>-->
<!-- </dependency>-->
<!-- Add logging framework, to produce console output when running in the IDE.-->
<!-- These dependencies are excluded from the application JAR by default.-->
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<version>${log4j.version}</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>${log4j.version}</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>${log4j.version}</version>
<scope>runtime</scope>
</dependency>
<!-- <dependency>-->
<!-- <groupId>com.alibaba</groupId>-->
<!-- <artifactId>fastjson</artifactId>-->
<!-- <version>1.2.72</version>-->
<!-- </dependency>-->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.46</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-jdbc_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<!--<dependency>-->
<!--<groupId>org.apache.flink</groupId>-->
<!--<artifactId>flink-shaded-hadoop-2-uber</artifactId>-->
<!--<version>${flink-shaded-hadoop.version}</version>-->
<!--</dependency>-->
</dependencies>
Scala demo
import org.apache.flink.api.scala._
import org.apache.flink.runtime.entrypoint.ClusterEntrypoint.ExecutionMode
import org.apache.flink.streaming.api.CheckpointingMode
import org.apache.flink.table.api._
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment
/**
* @Author: jhon_yh
* @Description: sth todo
*/
// hbase version 2.1.0-cdh6.1.1
object FlinkKafkaHbase extends Logging{
def main(args: Array[String]): Unit = {
//获取 流式运行环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.enableCheckpointing(1000L*60, CheckpointingMode.EXACTLY_ONCE)
env.getCheckpointConfig.setCheckpointStorage("hdfs://ns/test/yh/checkpoint/hbase/")
//
val settings = EnvironmentSettings.newInstance().useBlinkPlanner().build()
//获取流式表的运行环境
val tEnv = StreamTableEnvironment.create(env, settings)
//获取kafka 流
tEnv.getConfig.setSqlDialect(SqlDialect.DEFAULT)
tEnv.executeSql("drop table if exists KafkaSourceTable")
//ts as to_timestamp(from_unixtime(tid/1000, 'yy-MM-dd HH:mm:ss'))
// user_action_time AS PROCTIME() -- 声明一个额外的列作为处理时间属性
val createSourceTableSql: String =
"""
|create table KafkaSourceTable(
| vin String,
| tid String,
| source String,
| datas String,
| ts as substr(concat('20',tid),1,8 )
| ) WITH (
| 'connector' = 'kafka',
| 'topic' = 'HW_BIGDATA',
| 'properties.bootstrap.servers' = 'bs:9092',
| 'properties.group.id' = 'flink-test-group',
| 'format' = 'json',
| 'scan.startup.mode' = 'earliest-offset',
| 'json.ignore-parse-errors' = 'true',
| 'scan.startup.mode' = 'group-offsets'
|)
""".stripMargin
tEnv.executeSql(createSourceTableSql)
val createSinkTableSql: String =
"""
|CREATE TABLE mytable (
| rowkey String,
| cf1 ROW<tid String, source String, datas String, ts String>,
| PRIMARY KEY (rowkey) NOT ENFORCED
|) WITH (
| 'connector' = 'hbase-2.2',
| 'table-name' = 'gdc_dev_test:mytable',
| 'zookeeper.quorum' = 'zk:2181'
|)
|""".stripMargin
log.info(s"createSinkTableSql: $createSinkTableSql")
tEnv.executeSql(createSinkTableSql)
val insertSql: String =
"""
|INSERT INTO mytable
|SELECT vin, ROW(tid, source, datas, ts) FROM KafkaSourceTable
|""".stripMargin
log.info(s"insertSql: $insertSql")
tEnv.executeSql(insertSql)
}
}
运行效果图
flink on yarn