package batch.BatchAPI
import org.apache.flink.api.common.operators.Order
import org.apache.flink.api.scala.ExecutionEnvironment
import scala.collection.mutable.ListBuffer
/**
* @author Jacky
*/
object FirstAndSortDemo {
def main(args: Array[String]): Unit = {
val env = ExecutionEnvironment.getExecutionEnvironment
import org.apache.flink.api.scala._
val list = new ListBuffer[Tuple2[Integer, String]]()
list.append((1, "张三"))
list.append((2, "tom"))
list.append((2, "jakcy"))
list.append((2, "jack"))
list.append((4, "haha"))
list.append((4, "hehe"))
list.append((4, "tony"))
val datas = env.fromCollection(list)
println("==============First-n:获取集合中的前N个元素===================")
//将datas中的数据按第1个字段分组,然后取出每组中的前2个元素,相当于“分组取TOP N”
datas.groupBy(0).first(2).print()
println("===========================================")
//将datas中的数据按第1个字段分组,并在组中按第2个字段降序排序,然后取出每组中的前2个元素,相当于“分组排序取TOP N”
datas.groupBy(0).sortGroup(1, Order.DESCENDING).first(2).print()
println("===========================================")
//将datas中的数据进行全局降序排序,并取出前2个元素
datas.sortPartition(0, Order.DESCENDING).sortPartition(1, Order.ASCENDING)
.first(2).print()
val list1 = new ListBuffer[Tuple2[Integer, String]]()
list1.append((1, "张三"))
list1.append((2, "tom"))
list1.append((2, "jakcy"))
list1.append((2, "jack"))
list1.append((4, "haha"))
list1.append((4, "hehe"))
list1.append((4, "tony"))
val datas1 = env.fromCollection(list1)
println("===========Hash-Partition:根据指定key的哈希值对数据集进行分区=================")
datas1.partitionByHash(0).mapPartition(it => {
while (it.hasNext) {
val data = it.next()
println("当前线程的ID:" + Thread.currentThread().getId + " " + data)
}
it
}).print()
println("===========Range-Partition:根据指定的key对数据集进行范围分区=================")
datas1.partitionByRange(0).mapPartition(it => {
while (it.hasNext) {
val data = it.next()
println("当前线程ID:" + Thread.currentThread().getId + " " + data)
}
it
}).print()
}
}
Flink中Transformation算子(二)
最新推荐文章于 2024-04-12 23:46:39 发布