简介
学习Flink流处理API对实时数据的处理
主要包括数据的读取 【SourceFunction】、 处理 、输出
【SinkFunction】
本次是在springboot环境中搭建的
学习的几个点
1、自定义读取的数据源
2、自定义数据输出的sink
3、springboot如何启动flink environment
4、flink中如何使用spring容器中的bean
效果展示
将flink读取到的数据输出到前端页面
flink读取数据
因为是在SpringBoot环境中搭建的flink去读取实时数据,所以第一
件事情就是解决在flink环境中如何使用spring容器中的bean
采用RichSourceFunction中的生命周期函数读取spring容器中的
bean
package com.bygones.service.flink;
import com.bygones.mapper.EquipmentPartMapper;
import com.bygones.pojo.EquipmentPart;
import com.bygones.pojo.RealAlarm;
import com.bygones.util.SpringApplicationContextUtil;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.stereotype.Component;
import java.util.List;
import java.util.Random;
/***
* 模拟实现flink的数据源
* implement SourceFunction
* extends RichSourceFunction
*/
@Component
public class MakeRealAlarmData extends RichSourceFunction<RealAlarm> {
// 无法引用
// @Autowired
// private EquipmentPartMapper equipmentPartMapper;
private EquipmentPartMapper equipmentPartMapper;
Random random = new Random();
// 生命周期函数 只会执行一次
@Override
public void open(Configuration parameters) throws Exception {
// 通过getBean去获取spring容器中的bean
ApplicationContext applicationContext = SpringApplicationContextUtil.getApplicationContext();
equipmentPartMapper = applicationContext.getBean(EquipmentPartMapper.class);
}
// 来一条数据执行一次
@Override
public void run(SourceContext<RealAlarm> sourceContext) throws Exception {
while (true){
List<EquipmentPart> equipmentPartList = equipmentPartMapper.selectAllEquipmentPart();
for ( EquipmentPart equipmentPart : equipmentPartList ) {
double val = random.nextGaussian();
sourceContext.collect(new RealAlarm(equipmentPart.getEquipmentCode(),equipmentPart.getEquipmentPartCode(),val,System.currentTimeMillis()));
}
}
}
@Override
public void cancel() {
}
}
flink 从自定义数据源中读取数据
DataStream<RealAlarm> inputStream = environment.addSource(makeRealAlarmData);
flink处理数据
现在使用的是map对单个数据的处理
DataStream<RealAlarm> mapStream = inputStream.map(new MapFunction<RealAlarm, RealAlarm>() {
// 可以在
@Override
public RealAlarm map(RealAlarm realAlarm) throws Exception {
return realAlarm;
}
});
flink输出数据
自定义sink输出
package com.bygones.service.flink;
import com.bygones.pojo.RealAlarm;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.springframework.stereotype.Component;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
@Component
public class MyJdbcSink extends RichSinkFunction<RealAlarm> {
Connection connection = null;
PreparedStatement statement = null;
@Override
public void open(Configuration parameters) throws Exception {
connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/db_springboot_mybatis","root","root");
statement = connection.prepareStatement("insert into biz_real_alarm(equipment_code,equipment_part_code,alarm_value,alarm_timestamp) values (?,?,?,?)");
}
@Override
public void invoke(RealAlarm value, Context context) throws Exception {
statement.setString(1, value.getEquipmentCode());
statement.setString(2, value.getEquipmentPartCode());
statement.setDouble(3, value.getAlarmValue());
statement.setLong(4,value.getAlarmTimestamp() );
statement.execute();
}
@Override
public void close() throws Exception {
statement.close();
connection.close();
}
}
springboot启动时同时启动flink环境
CommandLineRunner springboot启动时便会启动
package com.bygones.service.flink;
import com.bygones.pojo.RealAlarm;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.CommandLineRunner;
import org.springframework.stereotype.Component;
/***
* @author bygones
* @descirption
* <p>模拟一个实时数据源,连续不断的产生数据</p>
* <p>执行时机:项目启动时便执行,通过org.springframework.boot.CommandLineRunner来保证</p>
*/
@Component
public class RealtimeDataSource implements CommandLineRunner {
@Autowired
private MakeRealAlarmData makeRealAlarmData;
@Autowired
private MySQLSink mySQLSink;
@Autowired
private MyJdbcSink myJdbcSink;
@Override
public void run(String... args) throws Exception {
System.out.println("传感器数据模拟");
StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
// 设置并行度为1
environment.setParallelism(1);
// 输入
DataStream<RealAlarm> inputStream = environment.addSource(makeRealAlarmData);
// 处理
DataStream<RealAlarm> mapStream = inputStream.map(new MapFunction<RealAlarm, RealAlarm>() {
@Override
public RealAlarm map(RealAlarm realAlarm) throws Exception {
return realAlarm;
}
});
// 输出
mapStream.addSink(myJdbcSink);
// 打印到控制台
// inputStream.print();
environment.execute("real time data source");
}
}