package com.atguigu.Adatastream_api.transformations;
import com.atguigu.Fbeans.SensorReading;
import org.apache.flink.api.common.functions.RichReduceFunction;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**
*
* 测试增量聚合算子,比如max/min/sum/maxBy/minBy/reduce等
* 这些算子必须在keyBy之后使用,因为只有KeyedDataStream才能调用这些算子。
*
* min/max:
* 会保存当前分组id的第一条记录,将后续到同Key但聚合值更大的记录的聚合值保存在该条记录聚合位置
*minBy/maxBy:
* 打印同key下极值最大的记录。
*sum:
* 会保存当前分组id的第一条记录,将后续不断更新的同Key的聚合值保存在该条记录的聚合位置
*
* reduce:
* reduce更加强大,可以对输入数据做任何操作得到输出数据。
*
*/
public class RollingAggregateText {
public static void main(String[] args) throws Exception{
//创建环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
//读取数据并包装成pojo
DataStreamSource<String> inputStream = env.readTextFile("G:\\SoftwareInstall\\idea\\project\\UserBehaviorAnalysis\\BasicKnowledge\\src\\main\\resources\\sensor.txt");
DataStream<SensorReading> dataStream = inputStream.map(line -> {
String[] fields = line.split(",");
return new SensorReading(new String(fields[0]), new Long(fields[1]), new Double(fields[2]));
});
/**
* 处理数据
* keyBy(SensorReading::getId) 和 keyBy(data -> data.getId()) 返回的是Id的类型,这儿是String
* 直接传keyBy("id")则返回的是tuple类型、新版本即将过期
* 必须是tuple2类型的数据分组才可以使用keyBy(0)
*/
KeyedStream<SensorReading, String> keyedStream = dataStream.keyBy(SensorReading::getId);//方法引用,调用SensorReading类getId方法
DataStream<SensorReading> result1 = keyedStream.max("temperature");
DataStream<SensorReading> result2 = keyedStream.maxBy("temperature");
DataStream<SensorReading> result3 = keyedStream.sum("temperature");
/**
* 测试更加强大的reduce算子:
* 获取最新记录的时间,和最高的温度
* s1 是之前的记录,v2是新的记录
*/
DataStream<SensorReading> result4 = keyedStream.reduce(new RichReduceFunction<SensorReading>() {
@Override
public SensorReading reduce(SensorReading s1, SensorReading s2) throws Exception {
return new SensorReading(s1.getId(),s2.getTimestamp(),Math.max(s1.getTemperature(),s2.getTemperature()));
}
});
//输出数据
result1.print("max输出结果");
result2.print("maxBy输出结果");
result3.print("sum输出结果");
result4.print("reduce输出结果");
env.execute("测试flink的滚动聚合算子");
}
}
Flink入门第四课:flink的增量聚合函数min/max/minBy/maxBy/reduce
最新推荐文章于 2023-12-08 17:09:54 发布