package com.cnic.flink.dataset.coding
import org.apache.flink.api.java.aggregation.Aggregations
import org.apache.flink.api.scala.{AggregateDataSet, DataSet, ExecutionEnvironment}
import org.apache.flink.streaming.api.scala._
import scala.collection.mutable
object AggregateDemo {
def main(args: Array[String]): Unit = {
val env = ExecutionEnvironment.getExecutionEnvironment
val data = new mutable.MutableList[(Int, String, Double)]
data.+=((1, "yuwen", 89.0))
data.+=((2, "shuxue", 92.2))
data.+=((3, "yuwen", 89.99))
// 构建数据源
val input: DataSet[(Int, String, Double)] = env.fromCollection(data)
val value: AggregateDataSet[(Int, String, Double)] = input.groupBy(1).aggregate(Aggregations.MAX,2)
value.print()
}
}
注意:
1,Aggregate只能作用于元组上
2,使用aggregate,只能使用字段索引名或索引名称来进行分组 groupBy(0) ,否则会报以下错误:
Exception in thread "main" java.lang.UnsupportedOperationException: Aggregate does not support grouping with KeySelector functions, yet.