简介: 在常规的业务开发中,SQL、Table API、DataStream API
比较常用,处于Low-level的Porcession相对用得较少
,通过实战来熟悉处理函数(Process Function)。
官方针对最底层API,提供方法:process方法,其中需要实现ProcessFunction函数
/**
* @author liu a fu
* @version 1.0
* @date 2021/3/10 0010
* @DESC 基于Flink提供ProcessFunction底层API实现:词频统计WordCount
* https://ci.apache.org/projects/flink/flink-docs-release-1.10/dev/stream/operators/process_function.html
*/
public class StreamProcessDemo {
public static void main(String[] args) throws Exception {
//1-环境准备
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//2-数据源source
DataStreamSource<String> inputProcessing = env.socketTextStream("node1.itcast.cn", 9999);
//3-数据的transformation
SingleOutputStreamOperator<Tuple2<String, Integer>> wordsDataStream = inputProcessing
.filter(line -> line != null && line.trim().length() > 0)
.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String line, Collector<Tuple2<String, Integer>> out) throws Exception {
for(String word: line.trim().toLowerCase().split("\\s+")){
out.collect(Tuple2.of(word, 1));
}
}
});
// TODO:调用process方法,对keyBy分组,再KeyedStream进行聚合计算
SingleOutputStreamOperator<String> countState = wordsDataStream
.keyBy(0)
//.sum(1)
.process(new KeyedProcessFunction<Tuple, Tuple2<String, Integer>, String>() {
//定义储存变量
private transient ValueState<Integer> countState = null;
@Override
public void open(Configuration parameters) throws Exception {
//初始化状态
countState = getRuntimeContext().getState(
new ValueStateDescriptor<Integer>("countState",Integer.class)
);
}
@Override
public void processElement(Tuple2<String, Integer> value,
Context ctx,
Collector<String> out) throws Exception {
/*
countState -> history: count
(spark, 1) - value
|
|
*/
// 获取当前处理数据中状态
Integer currentValue = value.f1 ;
// 获取历史转态
Integer historyValue = countState.value();
// 判断是否为第一次计算,如果是,以前没有状态值
if(null != historyValue){
countState.update(currentValue + historyValue);
}else {
countState.update(currentValue);
}
// 输出
out.collect(value.f0 + " = " + countState.value());
}
});
//4-数据的sink
countState.printToErr();
//5-获取execute执行器
env.execute(StreamProcessDemo.class.getSimpleName());
}
}