增量聚合数据与量聚合数据的区别
增量聚合数据 :占用更小的空间、可以进行高效进行聚合
全量聚合数据:赞起来,窗口触发再进行处理
案例1:keyed的CountWindow的使用【增量聚合】
直接代码:
package cn._51doit.flink.day04;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.*;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.windows.GlobalWindow;
/**
*keyed的CountWindow的使用【增量聚合】--【无界流】
* 先keyed,再划分count窗口
* 触发条件:主内的数据必须满足5条数据,其他都不触发
*/
public class CountWindowDemo {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
DataStreamSource<String> lines = env.socketTextStream("Master", 8888);
SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndCount = lines.map(new MapFunction<String, Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> map(String value) throws Exception {
String[] fields = value.split(",");
return Tuple2.of(fields[0], Integer.parseInt(fields[1]));
}
});
//调用keyBy
KeyedStream<Tuple2<String, Integer>, String> keyed = wordAndCount.keyBy(t -> t.f0);
//划分窗口
WindowedStream<Tuple2<String, Integer>, String, GlobalWindow> window= keyed.countWindow(5);
//聚合
window.sum(1).print();
env.execute();
}
}
操作实现,在nc -lk 8888窗口下随便输入5条数据【发现控制台窗口不会触发现象】,直到在一条消息超过5次才会触发,比如:
注意:它只会统计出现5次的消息,当你重复发送消息,不会额外算进去的,比如:
查看job:http://localhost:8081/#/job/b91ef95206512a7ded9795ee58acca2c/overview
案例2:keyed的CountWindow的使用【增量聚合】
增量数据好处:占用更小的空间、可以进行高效进行聚合
直接代码:
package cn._51doit.flink.day04;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.*;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.windows.GlobalWindow;
/**
*
*keyed的CountWindow的使用【增量聚合】--【无界流】
* 先keyed,再划分count窗口
* 触发条件:主内的数据必须满足5条数据,其他都不触发
*/
public class CountWindowDemo_02 {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
DataStreamSource<String> lines = env.socketTextStream("Master", 8888);
SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndCount = lines.map(new MapFunction<String, Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> map(String value) throws Exception {
String[] fields = value.split(",");
return Tuple2.of(fields[0], Integer.parseInt(fields[1]));
}
});
//调用keyBy
KeyedStream<Tuple2<String, Integer>, String> keyed = wordAndCount.keyBy(t -> t.f0);
//划分窗口
WindowedStream<Tuple2<String, Integer>, String, GlobalWindow> window= keyed.countWindow(5);
//聚合
window.reduce(new ReduceFunction<Tuple2<String, Integer>>() {
/**
*增量数据 好处:占用更小的空间、可以进行高效进行聚合
* @param value1 窗口key第一次出现、中间累加的结果
* @param value2 同一个分区里面key相同的数据
* @return
* @throws Exception
*/
@Override
public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) throws Exception {
value1.f1= value1.f1 + value2.f1;
return value1;
}
}).print();
env.execute();
}
}