- 引入flink依赖以及kafka依赖
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<flink.version>1.13.0</flink.version>
<java.version>1.8</java.version>
<scala.binary.version>2.12</scala.binary.version>
<slf4j.version>1.7.30</slf4j.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.kafka</groupId>
<artifactId>spring-kafka</artifactId>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.1.18</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-cep_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-csv</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-common</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/commons-beanutils/commons-beanutils -->
<dependency>
<groupId>commons-beanutils</groupId>
<artifactId>commons-beanutils</artifactId>
<version>1.9.4</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.2</version>
</dependency>
<!-- 引入 Flink 相关依赖-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
</dependencies>
- 创建对应的kafka生产者
kafka消费者在监听消息队列消费消息之前,最好是能够该topic是已经存在的。
此controller接收get请求,将数据发送到kafka的clickTable的主题
@SuppressWarnings("all")
@RestController
public class KafkaProducer {
@Autowired
private KafkaTemplate<String, String> kafkaTemplate;
@GetMapping("send")
public String sendMsg() {
Random random = new Random();
String[] users = {"Bob", "John", "Alice"};
String[] pages = {"/user", "/prod", "/cart"};
kafkaTemplate.send("clickTable", new ClickEvent(users[random.nextInt(users.length)],
pages[random.nextInt(pages.length)]).toString());
return "ok";
}
}
- flink计算程序
flink作业用于将从kafka获取到的数据进行聚合计算,并且将计算结果重新输出到kafka的队列中。
@Component
public class FlinkWindowCompute implements InitializingBean, Serializable {
@Override
public void afterPropertiesSet() throws Exception {
//这一行代码尤其至关重要,因为Flink监听Kafka的消息程序是阻塞,也就是一个while(true)程序
//如果这里不采用异步,则主程序就会一直卡在这里,导致接口访问失败。
new Thread(()->{
try {
startCompute();
} catch (Exception e) {
e.printStackTrace();
}
}).start();
}
public SinkFunction<String> getSink(){
Properties properties = new Properties();
//kafka连接的必要属性
properties.setProperty("bootstrap.servers", "127.0.0.1:9092");
properties.setProperty("group.id", "consumer-group");
properties.setProperty("key.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
properties.setProperty("value.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
properties.setProperty("auto.offset.reset", "latest");
FlinkKafkaProducer<String> clickTableRes =
new FlinkKafkaProducer<>("clickWindowRes", new SimpleStringSchema(), properties);
return clickTableRes;
}
public SourceFunction<ClickEvent> getSourceKafka() {
Properties properties = new Properties();
//kafka连接的必要属性
properties.setProperty("bootstrap.servers", "127.0.0.1:9092");
properties.setProperty("group.id", "consumer-group");
properties.setProperty("key.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
properties.setProperty("value.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
properties.setProperty("auto.offset.reset", "latest");
return new FlinkKafkaConsumer<ClickEvent>("clickTable",
new KafkaDeserializationSchema<ClickEvent>() {
@Override
public boolean isEndOfStream(ClickEvent clickEvent) {
return false;
}
@Override
public ClickEvent deserialize(ConsumerRecord<byte[], byte[]> record) throws Exception {
byte[] value = record.value();
String valueStr = new String(value);
return JSON.parseObject(valueStr, ClickEvent.class);
}
@Override
public TypeInformation<ClickEvent> getProducedType() {
return TypeInformation.of(ClickEvent.class);
}
}, properties);
}
private void startCompute() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.getConfig().setAutoWatermarkInterval(200);
SingleOutputStreamOperator<ClickEvent> operator = env.addSource(
getSourceKafka()
// new SourceFunction<ClickEvent>() {
// @Override
// public void run(SourceContext<ClickEvent> ctx) throws Exception {
//
// while (true) {
// Random random = new Random();
// String[] users = {"Bob", "John", "Alice"};
// String[] pages = {"/user", "/prod", "/cart"};
//
// ctx.collect(new ClickEvent(users[random.nextInt(users.length)],
// pages[random.nextInt(pages.length)]));
// Thread.sleep(1000);
// }
// }
//
// @Override
// public void cancel() {
//
// }
// }
).assignTimestampsAndWatermarks(
WatermarkStrategy.<ClickEvent>forBoundedOutOfOrderness(Duration.ZERO)
.withTimestampAssigner(new SerializableTimestampAssigner<ClickEvent>() {
@Override
public long extractTimestamp(ClickEvent clickEvent, long l) {
return clickEvent.ts;
}
})
);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
Table clickTable = tableEnv.fromDataStream(operator, $("userId"), $("pageId"), $("ts")
.rowtime());
//注册表视图
tableEnv.createTemporaryView("clickTable", clickTable);
//执行sql
Table tableRes = tableEnv.sqlQuery(
"select userId,count(*) as cnt from clickTable " +
"group by userId"
);
// tableEnv.toChangelogStream(tableRes).print("result:");
tableEnv.toChangelogStream(tableRes)
.map(String::valueOf)
.addSink(getSink());
env.execute();
}
}
- kafka消费者的数据监听
消费flink计算后的数据,进行进一步存储或者计算等。
@KafkaListener(topics = "clickWindowRes",topicPartitions = {
@TopicPartition(topic = "clickWindowRes",partitions = {"0","1"})
})
public void clickWindowResKafkaConsumer(
ConsumerRecord<String,String> record, Acknowledgment ack
){
String value = record.value();
System.out.println("kafka消费结果:" + value);
ack.acknowledge();
}
- 采坑
- 设计到flink以及kafka相关的类,最好是实现了序列化接口,否则会抛出not serilable Exception.
- addSource方法由于是不断的while(true)进行收集数据,所以当主线程执行到这里的时候就卡住了,不再继续往下执行了,导致SpringBoot程序没有完全启动,这里采用了异步启动方式。