Flink编程模型
分层API
1、ProcessFunction
public class MyProcessFunction extends ProcessFunction<String, String> {
private ValueState<Integer> count = null;
@Override
public void open(Configuration parameters) throws Exception {
ValueStateDescriptor<Integer> descriptor = new ValueStateDescriptor<>("count", Integer.class);
count = getRuntimeContext().getState(descriptor);
}
@Override
public void processElement(String value, Context ctx, Collector<String> out) throws Exception {
Integer currentCount = count.value();
if (currentCount == null) {
currentCount = 0;
}
currentCount += 1;
count.update(currentCount);
out.collect("Input: " + value + ", Count: " + currentCount);
}
}
2、DataStream API
DataStream<String> input = env.addSource(new FlinkKafkaConsumer<>("input-topic", new SimpleStringSchema(), properties));
DataStream<Tuple2<String, Integer>> result = input.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
for (String word : value.split("\\s")) {
out.collect(new Tuple2<>(word, 1));
}
}
}).keyBy(0).sum(1);
result.print();
3、SQL & Table API
TableEnvironment tableEnv = TableEnvironment.create(env);
// register a table named "input"
DataStreamSource<Tuple2<String, Integer>> input = env.fromElements(new Tuple2<>("a", 1), new Tuple2<>("b", 2));
tableEnv.registerDataStream("input", input, $("word"), $("frequency"));
// execute a SQL query
Table result = tableEnv.sqlQuery("SELECT word, SUM(frequency) as frequency FROM input GROUP BY word");
DataStream<Tuple2<Boolean, Row>> resultStream = tableEnv.toRetractStream(result, Row.class);
resultStream.print();