Flink 1.12 SQL 应用
1.mysql实时数据与kafka更新的维度数据进行关联,补全维度后输出到dwd层kafka
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.functions.ScalarFunction;
import pojo.ShopingEntity;
import java.sql.Timestamp;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
public class MysqlJoinKafkaToKafka {
public static void main(String[] args) throws Exception {
//初始化 flink-sql 环境
StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment);
environment.setParallelism(1);
//将自定义的UDF进行注册使用
tableEnvironment.createTemporarySystemFunction("dateFormatUDF", new TestConnect.DateFormatFunc());
//创建kafkaSource维度数据流
tableEnvironment.executeSql("CREATE TABLE dim_shoping (\n" +
" shoping_id INT,\n" +
" shoping_name string,\n" +
" shoping_hot INT\n" +
" ) WITH (\n" +
" 'connector' = 'kafka',\n" +
" 'topic' = 'dim_shoping',\n" +
" 'scan.startup.mode' = 'earliest-offset',\n" +
" 'properties.bootstrap.servers' = 'hadoop1:9092,hadoop2:9092,hadoop-3:9092',\n" +
" 'properties.group.id' = 'testgroup',\n" +
" 'format' = 'json'\n" +
" )");
//创建mysql实时订单流
tableEnvironment.executeSql("create table shoping_orders ( " +
" id BigInt,\n" +
" market_id INT,\n" +
" business_id string,\n" +
" order_id string,\n" +
" shoping_id string,\n" +
" shoping_amount decimal(11,4),\n" +
" create_time Timestamp,\n" +
" update_time Timestamp,\n" +
" PRIMARY KEY(id) NOT ENFORCED\n" +
") with ( " +
" 'connector' = 'jdbc',\n" +
" 'url' = 'jdbc:mysql://localhost:3306/shoping',\n" +
" 'username' = 'hadoop',\n" +
" 'password' = 'hadoop',\n" +
" 'table-name' = 'shoping_orders',\n" +
" 'driver' = 'com.mysql.jdbc.Driver')");
//创建kafka中dwd层sink表
tableEnvironment.executeSql("CREATE TABLE dwd_shoping (\n" +
" id BigInt,\n" +
" market_id INT,\n" +
" business_id string,\n" +
" order_id string,\n" +
" shoping_id string,\n" +
" shoping_amount decimal(11,4),\n" +
" shoping_hot INT,\n" +
" create_time string,\n" +
" update_time string\n" +
" ) WITH (\n" +
" 'connector.type' = 'kafka',\n" +
" 'connector.topic' = 'dwd_shoping',\n" +
" 'connector.version' = 'universal',\n" +
" 'connector.properties.zookeeper.connect' = 'hadoop1:2181,hadoop2:2181,hadoop3:2181',\n" +
" 'connector.properties.bootstrap.servers' = 'hadoop1:9092,hadoop2:9092,dev-hadoop3:9092',\n" +
" 'format.type' = 'json',\n" +
" 'format.derive-schema' = 'true'\n" +
")");
//双流join,补全维度
String queryKafkaSQL = "select " +
"rel.id as id,\n" +
"rel.market_id as market_id,\n" +
"rel.business_id as business_id,\n" +
"rel.order_id as order_id,\n" +
"rel.shoping_id as shoping_id,\n" +
"rel.shoping_amount as order_amount,\n" +
"dim.shoping_hot as shoping_hot,\n" +
"dateFormatUDF(rel.create_time,'yyyy-MM-dd HH:mm:ss') as create_time,\n" +
"dateFormatUDF(rel.update_time,'yyyy-MM-dd HH:mm:ss') as update_time\n" +
"from shoping_orders as rel \n" +
"left outer join dim_shoping as dim \n" +
"on rel.shoping_id=dim.shoping_id" +
"";
//数据会变化,无法直接写入Kafka。先将 table 转成 dataStream后再转为sql流写入kafka
Table sqlQuery = tableEnvironment.sqlQuery(queryKafkaSQL);
DataStream<Tuple2<Boolean, ShopingEntity>> toRetractStream = tableEnvironment.toRetractStream(sqlQuery, ShopingEntity.class);
//取结果流,f0为是否为更新数据,f1为具体数据
SingleOutputStreamOperator<ShopingEntity> ShopingEntityOutputStream = toRetractStream.map(r -> (r.f1));
//将数据流转化为table
Table resultTable = tableEnvironment.fromDataStream(ShopingEntityOutputStream);
//创建视图-结果表
tableEnvironment.createTemporaryView("resultTable", resultTable);
//将数据写入kafka
String insertKafkaSQL = "insert into dwd_shoping select " +
"id as id,\n" +
"market_id as market_id,\n" +
"business_id as business_id,\n" +
"order_id as order_id,\n" +
"shoping_id as shoping_id,\n" +
"shoping_amount as shoping_amount,\n" +
"shoping_hot as shoping_hot,\n" +
"create_time as create_time,\n" +
"update_time as update_time\n" +
"from resultTable";
tableEnvironment.executeSql(insertKafkaSQL);
}
/**
* 自定义Flink的UDF,
*/
public static class DateFormatFunc extends ScalarFunction {
public String eval(Timestamp timestamp, String format) throws Exception {
LocalDateTime localDateTime = timestamp.toLocalDateTime();
ZonedDateTime zonedDateTime = ZonedDateTime.of(localDateTime, ZoneId.of("UTC"));
ZonedDateTime withZoneSameInstant = zonedDateTime.withZoneSameInstant(ZoneId.of("+08:00"));
return withZoneSameInstant.format(DateTimeFormatter.ofPattern(format));
}
}
}