使用flink通过窗口实现数据源(mq)数据过滤。
需求:一个老师有基本属性包括(编号,名字,年龄,学生集合,以及图片信息)需要通过flink窗口实现多字段过滤以及取最大年龄的数据
- 定义student类
public class Student {
private String name;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public Student() {
}
public Student(String name) {
this.name = name;
}
}
- 定义mq数据源
public class RocketMqTuple5Source extends RichParallelSourceFunction<Tuple5<String, String, Integer, List<Student>, byte[]>> {
private Consumer consumer;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
Properties properties = ConfigUtil.getRocketMqConfiguration();
properties.put(PropertyKeyConst.GROUP_ID, "leo-test-flink" + ConfigUtil.getDomainConfig());
consumer = ONSFactory.createConsumer(properties);
}
@Override
public void close() throws Exception {
try {
cancel();
} finally {
super.close();
}
}
@Override
public void run(SourceContext<Tuple5<String, String, Integer, List<Student>, byte[]>> sourceContext) {
consumer.subscribe("flink-test-demo",
"test",
new MessageListener() {
@Override
public Action consume(Message message, ConsumeContext context) {
try {
MqData mqData = JSON.parseObject(new String(message.getBody()),
MqData.class);
List<Student> students = new ArrayList<>();
students.add(new Student("def"));
int b = 456;
int a = 457;
byte test = (byte) b;
byte test1 = (byte)a;
byte[] bys={test,test1};
Tuple5<String, String, Integer, List<Student>, byte[]> tuple = new Tuple5<String, String, Integer, List<Student>, byte[]>(
mqData.getNo(), mqData.getName(), mqData.getAge(),students,bys
);
sourceContext.collect(tuple);
} catch (Exception e) {
e.printStackTrace();
}
return Action.CommitMessage;
}
});
consumer.start();
System.out.println("=================consumer start");
while (consumer.isStarted()) {
try {
Thread.sleep(50);
} catch (InterruptedException e) {
e.printStackTrace();
Thread.currentThread().interrupt();
Thread.currentThread().interrupt();
}
}
}
@Override
public void cancel() {
if (consumer != null) {
consumer.shutdown();
}
}
}
- flink执行类
public class MqDataSourceTuple5Test {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
EnvironmentSettings bsSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, bsSettings);
DataStream<Tuple5<String, String, Integer, List<Student>, byte[]>> text = env.addSource(new RocketMqTuple5Source()).keyBy(0);
tableEnv.registerDataStream("mqDataTable", text, "nob, nameb, ageb, students, bys");
Table stageTable = tableEnv.sqlQuery("SELECT nob, nameb, mqDataTable.ageb, students, bys FROM mqDataTable");
DataStream<Tuple5<String, String, Integer,List<Student>, byte[]>> stream1 = tableEnv.toAppendStream(stageTable, TypeInformation.of(new TypeHint<Tuple5<String, String, Integer,List<Student>, byte[]>>() {}));
DataStream<Row> keyByStream = stream1.keyBy(0,1)
.timeWindow(Time.seconds(10)).maxBy(2).returns(Types.TUPLE(Types.STRING, Types.STRING, Types.INT, Types.GENERIC(List.class), Types.GENERIC(byte.class)))
.flatMap(new FlatMapFunction<Tuple5<String, String, Integer, List<Student>, byte[]>, Row>() {
@Override
public void flatMap(Tuple5<String, String, Integer, List<Student>, byte[]> value, Collector<Row> out) throws Exception {
Row row = new Row(5);
row.setField(0, value.f0);
row.setField(1, value.f1);
row.setField(2, value.f2);
row.setField(3, value.f3);
row.setField(4, value.f4);
out.collect(row);
}
}).returns(Types.ROW(Types.STRING, Types.STRING, Types.INT, Types.GENERIC(List.class), Types.GENERIC(byte.class)));
keyByStream.print();
env.execute();
}
}
- 通过mq控制台发送测试数据
- 控制台输出结果
以上写法纯属自己对flink的理解,如有更好的写法欢迎指出,分享!