背景:
使用flinkSQL 消费kafka流数据,操作后写入kafka
flink版本:flink1.13.0
样例数据:
{"streamTaskId":"1307"
,"channel":"aa"
,"instanceId":"66996"
,"jobName":"flinksqldemo"
,"jobId":"1231213432432423af"
,"timestamp":1630484255363}
source表:
String sourcecheckpoint = "CREATE TABLE sourcecheckpoint(\n" +
"streamTaskId STRING,\n" +
"channel STRING,\n" +
"instanceId STRING,\n" +
"jobName STRING,\n" +
"jobId STRING,\n" +
"`timestamp` BIGINT,\n" +
"ts AS TO_TIMESTAMP(FROM_UNIXTIME(`timestamp` / 1000, 'yyyy-MM-dd HH:mm:ss')),\n" +
"WATERMARK FOR ts AS ts - INTERVAL '3' SECOND\n" +
") WITH(\n" +
"'connector'='kafka',\n" +
"'properties.group.id'='stream_test_in',\n" +
"'properties.bootstrap.servers'='test.****.com:9092',\n" +
"'topic'='stream-test-topic',\n" +
"'scan.startup.mode' = 'latest-offset',\n" +
"'format'='json'\n" +
")";
sink表:
String sink = "CREATE TABLE print_table (\n" +
" streamTaskId STRING,\n" +
" streamTaskNum BIGINT,\n" +
" `endTime` TIMESTAMP\n" +
") WITH (\n" +
"'connector'='kafka',\n" +
"'properties.group.id'='stream_test_out',\n" +
"'properties.bootstrap.servers'='test.*****.com:9092',\n" +
"'topic'='stream-metric-test',\n" +
"'format'='json'\n" +
")";
二、操作
1.groupBy
String groupbyStr = "INSERT INTO print_table SELECT\n" +
" streamTaskId, count(streamTaskId) as streamTaskNum, TUMBLE_END(ts, INTERVAL '10' SECOND) as endTime FROM sourcecheckpoint\n" +
" GROUP BY streamTaskId, TUMBLE(ts, INTERVAL '10' SECOND)";
2.join
String joinStr = "INSERT INTO print_table SELECT a.streamTaskId, b.channel FROM sourcecheckpoint1 a INNER JOIN sourcecheckpoint2 b ON a.streamTaskId = b.streamTaskId"
持续更新…
三、main函数
public class flinkSQLkafkaToKafka {
public static void main(String[] args) {
StreamExecutionEnvironment bsEnv = StreamExecutionEnvironment.getExecutionEnvironment();
EnvironmentSettings bsSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(bsEnv, bsSettings);
tableEnvironment.executeSql(sourcejobinfo);
tableEnvironment.executeSql(sourcecheckpoint);
tableEnvironment.executeSql(sink);
TableResult tableResult = tableEnvironment.executeSql(groupbyStr);
try {
bsEnv.execute("sqldemo");
} catch (Exception e) {
e.printStackTrace();
}
}
}
四、pom依赖
<properties>
<flink.version>1.13.0</flink.version>
<target.java.version>1.8</target.java.version>
<scala.binary.version>2.11</scala.binary.version>
<maven.compiler.source>${target.java.version}</maven.compiler.source>
<maven.compiler.target>${target.java.version}</maven.compiler.target>
<log4j.version>2.12.1</log4j.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<!-- <scope>compile</scope>-->
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<!-- <scope>compile</scope>-->
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<!-- <scope>compile</scope>-->
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<!-- <scope>compile</scope>-->
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<!-- <scope>compile</scope>-->
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-runtime-web_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<!-- <scope>compile</scope>-->
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
....
....
</dependencies>