Flink Table 读写Hbase
1、获取表环境
批处理为例
// 批执行环境
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
// 表环境
EnvironmentSettings settings = EnvironmentSettings.newInstance()
.useBlinkPlanner()
.inBatchMode()
.build();
TableEnvironment tableEnv = TableEnvironment.create(settings);
2、创建Hbase表连接
// 明细表
TableResult tableResult = tableEnv.executeSql(
"CREATE TABLE DWD_YDJT_SCALEANDINC_DATE (" +
" rowkey STRING," +
" info ROW<businessDataId STRING, businessDate STRING, businessIncome DOUBLE, businessScale DOUBLE, channelSource STRING>," +
" PRIMARY KEY (rowkey) NOT ENFORCED" +
" ) WITH (" +
" 'connector' = 'hbase-2.2' ," +
" 'table-name' = 'DWD_YDJT_SCALEANDINC_DATE' ," +
" 'zookeeper.quorum' = 'linux121:2181,linux122:2181,linux123:2181'" +
" )");
//聚合表
TableResult dwsTableResult = tableEnv.executeSql(
"CREATE TABLE DWS_YDJT_SCALEANDINC_DATE (" +
" rowkey STRING," +
" info ROW< channelSource STRING, businessDate STRING, businessIncome DOUBLE, businessScale DOUBLE>," +
" PRIMARY KEY (rowkey) NOT ENFORCED" +
" ) WITH (" +
" 'connector' = 'hbase-2.2' ," +
" 'table-name' = 'DWS_YDJT_SCALEANDINC_DATE' ," +
" 'zookeeper.quorum' = 'linux121:2181'" +
" )");
注意:connector’ = ‘hbase-2.2’ hbase版本要求不是很严格,以官网为例
https://ci.apache.org/projects/flink/flink-docs-release-1.13/zh/docs/connectors/table/overview/

3、Hbase读取计算和写入
3.1利用Flink SQL读取Hbase明细表和计算
读取数据并创建临时试图dwsTable
Table table = tableEnv.sqlQuery("select TO_BASE64(channelSource||businessDate) ,channelSource,businessDate,sum(businessIncome) as businessIncome, sum(businessScale) as businessScale FROM DWD_YDJT_SCALEANDINC_DATE group by channelSource,businessDate");
tableEnv.createTemporaryView("dwsTable", table);
3.2写入Hbase聚合表
// INSERT INTO hTable
// SELECT rowkey, ROW(f1q1), ROW(f2q2, f2q3), ROW(f3q4, f3q5, f3q6) FROM T;
TableResult executeResult = tableEnv.executeSql("insert into DWS_YDJT_SCALEANDINC_DATE " +
"select TO_BASE64(channelSource||businessDate) ,ROW(channelSource,businessDate,businessIncome, businessScale) FROM dwsTable ");
参考官网:https://ci.apache.org/projects/flink/flink-docs-release-1.13/zh/docs/connectors/table/hbase/#%E5%A6%82%E4%BD%95%E4%BD%BF%E7%94%A8-hbase-%E8%A1%A8
4、pom
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>flinksqlConnHbase</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<flink.version>1.12.1</flink.version>
<scala.binary.version>2.12</scala.binary.version>
<hive.version>3.1.2</hive.version>
<mysql.version>8.0.19</mysql.version>
<hbase.version>2.4.0</hbase.version>
</properties>
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-hbase -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-hbase_2.12</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_2.12</artifactId>
<version>1.12.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-scala -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.12</artifactId>
<version>1.11.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.12</artifactId>
<version>1.11.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_2.12</artifactId>
<version>1.11.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.73</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-hbase -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-hbase_2.12</artifactId>
<version>1.10.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.9.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.9.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.9.2</version>
</dependency>
<!--https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core-->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.9.2</version>
</dependency>
<!-- Flink -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- HBase -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-hbase-2.2_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
</dependency>
<!-- Table API -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- csv -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-csv</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- <!– Lombok –>-->
<!-- <dependency>-->
<!-- <groupId>org.projectlombok</groupId>-->
<!-- <artifactId>lombok</artifactId>-->
<!-- <version>1.18.18</version>-->
<!-- </dependency>-->
</dependencies>
</project>
5、demo代码
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.TableResult;
import org.apache.flink.table.descriptors.ConnectTableDescriptor;
import org.apache.flink.table.descriptors.HBase;
import org.apache.flink.types.Row;
import org.apache.flink.util.CloseableIterator;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Consumer;
public class HBaseTest {
public static void main(String[] args) throws Exception {
// 批执行环境
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
// 表环境
EnvironmentSettings settings = EnvironmentSettings.newInstance()
.useBlinkPlanner()
.inBatchMode()
.build();
TableEnvironment tableEnv = TableEnvironment.create(settings);
// 明细表
TableResult tableResult = tableEnv.executeSql(
"CREATE TABLE DWD_YDJT_SCALEANDINC_DATE (" +
" rowkey STRING," +
" info ROW<businessDataId STRING, businessDate STRING, businessIncome DOUBLE, businessScale DOUBLE, channelSource STRING>," +
" PRIMARY KEY (rowkey) NOT ENFORCED" +
" ) WITH (" +
" 'connector' = 'hbase-2.2' ," +
" 'table-name' = 'DWD_YDJT_SCALEANDINC_DATE' ," +
" 'zookeeper.quorum' = 'linux121:2181,linux122:2181,linux123:2181'" +
" )");
//聚合表
TableResult dwsTableResult = tableEnv.executeSql(
"CREATE TABLE DWS_YDJT_SCALEANDINC_DATE (" +
" rowkey STRING," +
" info ROW< channelSource STRING, businessDate STRING, businessIncome DOUBLE, businessScale DOUBLE>," +
" PRIMARY KEY (rowkey) NOT ENFORCED" +
" ) WITH (" +
" 'connector' = 'hbase-2.2' ," +
" 'table-name' = 'DWS_YDJT_SCALEANDINC_DATE' ," +
" 'zookeeper.quorum' = 'linux121:2181'" +
" )");
// 查询是否能获取到HBase里的数据
// Table table = tableEnv.sqlQuery("SELECT rowkey, info FROM DWD_YDJT_SCALEANDINC_DATE");
Table table = tableEnv.sqlQuery("select TO_BASE64(channelSource||businessDate) ,channelSource,businessDate,sum(businessIncome) as businessIncome, sum(businessScale) as businessScale FROM DWD_YDJT_SCALEANDINC_DATE group by channelSource,businessDate");
tableEnv.createTemporaryView("dwsTable", table);
// INSERT INTO hTable
// SELECT rowkey, ROW(f1q1), ROW(f2q2, f2q3), ROW(f3q4, f3q5, f3q6) FROM T;
TableResult executeResult = tableEnv.executeSql("insert into DWS_YDJT_SCALEANDINC_DATE " +
"select TO_BASE64(channelSource||businessDate) ,ROW(channelSource,businessDate,businessIncome, businessScale) FROM dwsTable ");
// 查询的结果
// TableResult executeResult = table.execute();
// 获取查询结果
CloseableIterator<Row> collect = executeResult.collect();
// 输出 (执行print或者下面的 Consumer之后,数据就被消费了。两个只能留下一个)
executeResult.print();
List<DwdYdjtScaleAndIncDate> dwdList = new ArrayList<>();
//结构遍历解析
collect.forEachRemaining(new Consumer<Row>() {
@Override
public void accept(Row row) {
System.out.println(".......Consumer.........");
String field0 = String.valueOf(row.getField(0));
String[] user_movie = String.valueOf(row.getField(1)).split(",");
// Double ratting = Double.valueOf(String.valueOf(row.getField(1)));
dwdList.add(new DwdYdjtScaleAndIncDate(user_movie[0],user_movie[1],user_movie[1],user_movie[1],user_movie[1]));
}
});
System.out.println("................");
for(DwdYdjtScaleAndIncDate um : dwdList){
System.out.println(um);
}
env.execute("OutputTableDemo");
}
}
//public class HBaseTest {
// public static void main(String[] args) throws Exception {
// // 批执行环境
// ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
//
// // 表环境
// EnvironmentSettings settings = EnvironmentSettings.newInstance()
// .useBlinkPlanner()
// .inBatchMode()
// .build();
// TableEnvironment tableEnv = TableEnvironment.create(settings);
//
// // 创建用户-电影表 u_m
// TableResult tableResult = tableEnv.executeSql(
// "CREATE TABLE DWD_YDJT_SCALEANDINC_DATE (" +
// " rowkey STRING," +
// " info ROW<businessDataId STRING, businessDate STRING, businessIncome STRING, businessScale STRING, channelSource STRING>," +
// " PRIMARY KEY (rowkey) NOT ENFORCED" +
// " ) WITH (" +
// " 'connector' = 'hbase-2.2' ," +
// " 'table-name' = 'DWD_YDJT_SCALEANDINC_DATE' ," +
// " 'zookeeper.quorum' = 'linux121:2181,linux122:2181,linux123:2181'" +
// " )");
//
// // 查询是否能获取到HBase里的数据
// Table table = tableEnv.sqlQuery("SELECT rowkey, info FROM DWD_YDJT_SCALEANDINC_DATE");
//
// // 相当于 scan
Table table = tableEnv.sqlQuery("SELECT * FROM DWD_YDJT_SCALEANDINC_DATE");
//
// // 查询的结果
// TableResult executeResult = table.execute();
//
// // 获取查询结果
// CloseableIterator<Row> collect = executeResult.collect();
//
// // 输出 (执行print或者下面的 Consumer之后,数据就被消费了。两个只能留下一个)
// executeResult.print();
//
// List<DwdYdjtScaleAndIncDate> dwdList = new ArrayList<>();
//
// collect.forEachRemaining(new Consumer<Row>() {
// @Override
// public void accept(Row row) {
// String field0 = String.valueOf(row.getField(0));
// String[] user_movie = field0.split(",");
// Double ratting = Double.valueOf(String.valueOf(row.getField(1)));
// dwdList.add(new DwdYdjtScaleAndIncDate(user_movie[0],user_movie[1],user_movie[1],user_movie[1],user_movie[1]));
// }
// });
//
//
// System.out.println("................");
//
// for(DwdYdjtScaleAndIncDate um : dwdList){
// System.out.println(um);
// }
//
//
// }
//}
1560

被折叠的 条评论
为什么被折叠?



