以下是一个符合您需求的基本 Flink Maven 项目示例,其中包括 Kafka 和 MySQL 的连接,并使用了一个固定窗口任务和水位线设置为5秒:
在 pom.xml 文件中,添加以下依赖项:
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.12.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.12</artifactId>
<version>1.12.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_2.12</artifactId>
<version>1.12.2</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.25</version>
</dependency>
</dependencies>
编写 Flink 程序,其中使用 Kafka 消费者接收数据,并在固定时间窗口内将数据插入到 MySQL 数据库中。
import java.util.Properties;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.util.Collector;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import java.util.Properties;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.mysql.MySQLUpsertSinkFunction;
import org.apache.flink.streaming.connectors.mysql.RowDataUpsertSerializer;
import org.apache.flink.streaming.connectors.mysql.RowDataUpsertUpsertStatementFactory;
import org.apache.flink.table.api.DataTypes;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.types.DataType;
import org.apache.flink.table.types.logical.RowType;
import org.apache.flink.types.Row;
import org.apache.flink.streaming.api.datastream.DataStreamSink;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
public class FlinkKafkaMySQL {
public static void main(String[] args) throws Exception {
// set up the streaming execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// use event time
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
// set up Kafka consumer properties
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "localhost:9092");
properties.setProperty("group.id", "test");
// create a Kafka consumer
FlinkKafkaConsumer<String> consumer = new FlinkKafkaConsumer<>("test", new SimpleStringSchema(), properties);
// assign timestamps and
// watermarks to the Kafka consumer
DataStream<String> kafkaStream = env
.addSource(consumer)
.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<String>(Time.seconds(5)) {
@Override
public long extractTimestamp(String element) {
JSONObject jsonObject = JSON.parseObject(element);
long timestamp = jsonObject.getLongValue("timestamp");
return timestamp;
}
});
// parse the incoming JSON data into a Tuple2 of key and value
DataStream<Tuple2<String, String>> dataStream = kafkaStream.flatMap(new FlatMapFunction<String, Tuple2<String, String>>() {
@Override
public void flatMap(String value, Collector<Tuple2<String, String>> out) {
JSONObject jsonObject = JSON.parseObject(value);
String key = jsonObject.getString("key");
String data = jsonObject.getString("data");
out.collect(new Tuple2<>(key, data));
}
});
// create a table environment
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
// create a table from the incoming data stream
Table table = tableEnv.fromDataStream(dataStream, "key, data, rowtime.rowtime");
// group by key and time window
Table result = table
.window(TumblingEventTimeWindows.of(Time.seconds(10)))
.groupBy("key, TUMBLE(rowtime, INTERVAL '10' SECOND)")
.select("key, COUNT(data)");
// convert the result to a data stream and print it to the console
DataStream<Row> stream = tableEnv.toAppendStream(result, Row.class);
stream.print();
// create a MySQL sink
String insertQuery = "INSERT INTO test_result (key, count) VALUES (?, ?) ON DUPLICATE KEY UPDATE count = count + ?";
MySQLUpsertSinkFunction upsertSinkFunction = new MySQLUpsertSinkFunction(
insertQuery,
new RowDataUpsertUpsertStatementFactory(),
new RowDataUpsertSerializer(
(RowType) result.getSchema().toRowDataType(),
DataTypes.ARRAY(DataTypes.BIGINT()),
false
)
);
// create a data stream sink that writes to the MySQL sink
DataStreamSink<Row> sink = stream.addSink(upsertSinkFunction);
sink.name("MySQL Upsert Sink");
sink.setParallelism(1);
// execute the Flink job
env.execute("Flink Kafka MySQL Example");
}
注意:需要根据实际情况修改 MySQL 的连接信息和表名。
以上代码演示了如何使用 Flink 从 Kafka 中消费数据,进行简单的窗口计算,并将结果写入到 MySQL 数据库中。这是一个基础的示例,您可以根据自己的需求进行修改和扩展。