此文案例问转载:https://www.cnblogs.com/maoxiangyi/p/10978028.html
1. ProcessWindowFunction 效率低,整个窗口的数据都会被传递给算子,代码如下:
import org.apache.flink.api.common.functions.AggregateFunction import org.apache.flink.api.scala._ import org.apache.flink.streaming.api.functions.source.SourceFunction import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.streaming.api.scala.function.ProcessWindowFunction import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.streaming.api.windowing.windows.TimeWindow import org.apache.flink.util.Collector /** * @author: create by maoxiangyi * @version: v1.0 * @description: window * @date:2019 /6/4 */ object ProcessWordCount { def main(args: Array[String]): Unit = { //设置环境 val env: StreamExecutionEnvironment = StreamExecutionEnvironment.createLocalEnvironment() //设置数据源 env.addSource(new SourceFunction[String] { override def run(ctx: SourceFunction.SourceContext[String]): Unit = { while (true) { ctx.collect("hello hadoop hello storm hello spark") Thread.sleep(1000) } } override def cancel(): Unit = {} }) //计算逻辑 .flatMap(_.split(" ")) .map((_, 1)) .keyBy(_._1) .timeWindow(Time.seconds(10), Time.seconds(10)).process(new ProcessWindowFunction[(String, Int), (String, Int), String, TimeWindow] { override def process(key: String, context: Context, elements: Iterable[(String, Int)], out: Collector[(String, Int)]): Unit = {