flink窗口函数ReduceFunction、AggregateFunction、ProcessFunction实例

1、ReduceFunction

增量,输入、状态、输出类型相同

import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;

import java.time.Duration;
import java.util.Random;

public class ReduceFunctionTest {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment executionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
executionEnvironment.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        executionEnvironment.getConfig().setAutoWatermarkInterval(100);

        DataStreamSource<Tuple2<String, Long>> streamSource = executionEnvironment.addSource(new SourceFunction<Tuple2<String, Long>>() {
            boolean flag = true;

            @Override
            public void run(SourceContext<Tuple2<String, Long>> sourceContext) throws Exception {
                String[] str = {"韩梅梅", "张三", "王五", "李四"};
                while (flag) {
                    Thread.sleep(1000);
                    int i = new Random().nextInt(4);
                    sourceContext.collect(new Tuple2<String, Long>(str[i], System.currentTimeMillis()));
                }
            }

            @Override
            public void cancel() {
                flag = false;
            }
        });

        streamSource.assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String,Long>>forBoundedOutOfOrderness(Duration.ofSeconds(1))
        .withTimestampAssigner(new SerializableTimestampAssigner<Tuple2<String, Long>>() {
            @Override
            public long extractTimestamp(Tuple2<String, Long> stringLongTuple2, long l) {
                return stringLongTuple2.f1;
            }
        })).map(new MapFunction<Tuple2<String, Long>, Tuple3<String, Long, Integer>>() {
            @Override
            public Tuple3<String, Long, Integer> map(Tuple2<String, Long> stringLongTuple2) throws Exception {
                System.out.println(stringLongTuple2.f0 + stringLongTuple2.f1);
                return new Tuple3<String, Long, Integer>(stringLongTuple2.f0,stringLongTuple2.f1,1);
            }
        }).keyBy(new KeySelector<Tuple3<String, Long, Integer>, String>() {
            @Override
            public String getKey(Tuple3<String, Long, Integer> stringIntegerTuple2) throws Exception {
                return stringIntegerTuple2.f0;
            }
        }).window(TumblingEventTimeWindows.of(Time.seconds(10)))
                .reduce(new ReduceFunction<Tuple3<String, Long, Integer>>() {
                    @Override
                    public Tuple3<String, Long, Integer> reduce(Tuple3<String, Long, Integer> stringIntegerTuple2, Tuple3<String, Long, Integer> t1) throws Exception {
                        return new Tuple3<String, Long, Integer>(stringIntegerTuple2.f0,stringIntegerTuple2.f1,stringIntegerTuple2.f2 + t1.f2);
                    }
                }).print();


        executionEnvironment.execute("reduce test");

    }
}

2、AggregateFunction

增量,输入、状态、输出类型可以不同

import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;

import java.time.Duration;
import java.util.Random;

public class AggregateFunctionTest {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.getConfig().setAutoWatermarkInterval(100);

        DataStreamSource<Tuple2<String, Long>> streamSource = env.addSource(new SourceFunction<Tuple2<String, Long>>() {
            boolean flag = true;

            @Override
            public void run(SourceContext<Tuple2<String, Long>> sourceContext) throws Exception {
                String[] str = {"韩梅梅", "张三", "王五", "李四"};
                while (flag) {
                    Thread.sleep(1000);
                    int i = new Random().nextInt(4);
                    sourceContext.collect(new Tuple2<String, Long>(str[i], System.currentTimeMillis()));
                }
            }

            @Override
            public void cancel() {
                flag = false;
            }
        });

        streamSource.assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String,Long>>forBoundedOutOfOrderness(Duration.ofSeconds(1))
        .withTimestampAssigner(new SerializableTimestampAssigner<Tuple2<String, Long>>() {
            @Override
            public long extractTimestamp(Tuple2<String, Long> stringLongTuple2, long l) {
                return stringLongTuple2.f1;
            }
        })).map(new MapFunction<Tuple2<String, Long>, Tuple2<String, Integer>>() {
            @Override
            public Tuple2<String, Integer> map(Tuple2<String, Long> stringLongTuple2) throws Exception {
                return new Tuple2<String,Integer>(stringLongTuple2.f0,1);
            }
        }).keyBy(new KeySelector<Tuple2<String, Integer>, String>() {
            @Override
            public String getKey(Tuple2<String, Integer> stringIntegerTuple2) throws Exception {
                return stringIntegerTuple2.f0;
            }
        }).window(TumblingEventTimeWindows.of(Time.seconds(10)))
                .aggregate(new AggregateFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple2<String, Integer>>() {

                    //存储中间状态state,窗口初始化时调用
                    @Override
                    public Tuple2<String, Integer> createAccumulator() {
                        return new Tuple2<String, Integer>("",0);
                    }

                    //窗口来新元素时调用
                    @Override
                    public Tuple2<String, Integer> add(Tuple2<String, Integer> stringIntegerTuple2, Tuple2<String, Integer> stringIntegerTuple22) {
                        return new Tuple2<String, Integer>(stringIntegerTuple2.f0,stringIntegerTuple2.f1 + stringIntegerTuple22.f1);
                    }

                    //获取最后结果
                    @Override
                    public Tuple2<String, Integer> getResult(Tuple2<String, Integer> stringIntegerTuple2) {
                        return stringIntegerTuple2;
                    }

                    //合并两个state,窗口类型为session的时候使用,两个session窗口有可能合并为一个
                    @Override
                    public Tuple2<String, Integer> merge(Tuple2<String, Integer> stringIntegerTuple2, Tuple2<String, Integer> acc1) {
                        return null;
                    }
                }).print();

        env.execute("aggregate test");


    }
}

3、ProcessFunction

窗口数据全量计算,输入、输出类型可以不同

import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;

import java.time.Duration;
import java.util.Random;

public class ProcessFunctionTest {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.getConfig().setAutoWatermarkInterval(100);

        DataStreamSource<Tuple2<String, Long>> streamSource = env.addSource(new SourceFunction<Tuple2<String, Long>>() {
            boolean flag = true;

            @Override
            public void run(SourceContext<Tuple2<String, Long>> sourceContext) throws Exception {
                String[] str = {"韩梅梅", "张三", "王五", "李四"};
                while (flag) {
                    Thread.sleep(1000);
                    int i = new Random().nextInt(4);
                    sourceContext.collect(new Tuple2<String, Long>(str[i], System.currentTimeMillis()));
                }
            }

            @Override
            public void cancel() {
                flag = false;
            }
        });

        streamSource.assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String,Long>>forBoundedOutOfOrderness(Duration.ofSeconds(1))
                .withTimestampAssigner(new SerializableTimestampAssigner<Tuple2<String, Long>>() {
                    @Override
                    public long extractTimestamp(Tuple2<String, Long> stringLongTuple2, long l) {
                        return stringLongTuple2.f1;
                    }
        })).map(new MapFunction<Tuple2<String, Long>, Tuple2<String, Integer>>() {
            @Override
            public Tuple2<String, Integer> map(Tuple2<String, Long> stringLongTuple2) throws Exception {
                return new Tuple2<String,Integer>(stringLongTuple2.f0,1);
            }
        }).keyBy(new KeySelector<Tuple2<String, Integer>, String>() {
            @Override
            public String getKey(Tuple2<String, Integer> stringIntegerTuple2) throws Exception {
                return stringIntegerTuple2.f0;
            }
        }).window(TumblingEventTimeWindows.of(Time.seconds(10)))
//输入,输出,key,窗口类型
          .process(new ProcessWindowFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String, TimeWindow>() {

//key,上下文,窗口中的所有元素,返回收集器
                    @Override
                    public void process(String key, Context context, Iterable<Tuple2<String, Integer>> elements, Collector<Tuple2<String, Integer>> out) throws Exception {
                        int count = 0;
                        for (Tuple2<String, Integer> value : elements ) {
                            count = count + value.f1;
                        }
                        out.collect(new Tuple2<String, Integer>(key,count));
                    }
                }).print();

        env.execute("process test");
    }
}

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
Flink是一个流式处理框架,窗口函数是其核心功能之一,用于对数据流进行分组和聚合操作。窗口函数可以将无限的数据流划分为有限大小的窗口,并在每个窗口上执行计算操作。 Flink提供了多种类型的窗口函数,包括滚动窗口、滑动窗口和会话窗口。下面是对这些窗口函数的介绍: 1. 滚动窗口(Tumbling Window):滚动窗口将数据流划分为固定大小的、不重叠的窗口。每个窗口都包含固定数量的元素,并且每个元素只属于一个窗口。滚动窗口适用于对数据流进行离散化处理,例如按小时、按天或按月进行统计。 2. 滑动窗口(Sliding Window):滑动窗口将数据流划分为固定大小的、可能重叠的窗口。与滚动窗口不同,滑动窗口中的元素可以属于多个窗口。滑动窗口适用于需要考虑数据流中相邻元素之间关系的计算,例如计算最近一小时内的平均值。 3. 会话窗口(Session Window):会话窗口根据数据流中的活动时间间隔来划分窗口。当数据流中的元素之间的时间间隔超过指定的阈值时,会话窗口会关闭并输出结果。会话窗口适用于处理具有不确定时间间隔的事件流,例如用户在网站上的访问记录。 在Flink中,可以使用窗口函数对窗口中的数据进行聚合操作,例如求和、计数、平均值等。窗口函数可以通过自定义函数或使用预定义的聚合函数来实现。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值