Flink DateSet定制API详解(Scala版) -003
Reduce
以element为粒度,对element进行合并操作。最后只能形成一个结果。
执行程序:
package code.book.batch.dataset.advance.api
import org.apache.flink.api.common.functions.ReduceFunction
import org.apache.flink.api.scala.{ExecutionEnvironment, _}
object ReduceFunction001scala {
def main(args: Array[String]): Unit = {
val env = ExecutionEnvironment.getExecutionEnvironment
val text = env.fromElements(1, 2, 3, 4, 5, 6, 7)
val text2 = text.reduce(new ReduceFunction[Int] {
override def reduce(intermediateResult: Int, next: Int): Int = {
intermediateResult + next
}
})
text2.print()
val text3 = text.reduce(new ReduceFunction[Int] {
override def reduce(intermediateResult: Int, next: Int): Int = {
intermediateResult * next
}
})
text3.print()
val text4 = text.reduce(new ReduceFunction[Int] {
override def reduce(intermediateResult: Int, next: Int): Int = {
if (intermediateResult % 2 == 0) {
intermediateResult + next
} else {
intermediateResult * next
}
}
})
text4.print()
val text5 = text.reduce(new ReduceFunction[Int] {
override def reduce(intermediateResult: Int, next: Int): Int = {
println("intermediateResult=" + intermediateResult + " ,next=" + next)
intermediateResult + next
}
})
text5.collect()
}
}
执行结果:
text2.print()
28
text3.print()
5040
text4.print()
157
text5.print()
intermediateResult=1 ,next=2
intermediateResult=3 ,next=3
intermediateResult=6 ,next=4
intermediateResult=10 ,next=5
intermediateResult=15 ,next=6
intermediateResult=21 ,next=7
reduceGroup
对每一组的元素分别进行合并操作。与reduce类似,不过它能为每一组产生一个结果。
如果没有分组,就当作一个分组,此时和reduce一样,只会产生一个结果。
执行程序:
package code.book.batch.dataset.advance.api
import java.lang.Iterable
import org.apache.flink.api.common.functions.GroupReduceFunction
import org.apache.flink.api.scala.{ExecutionEnvironment, _}
import org.apache.flink.util.Collector
object GroupReduceFunction001scala {
def main(args: Array[String]): Unit = {
val env = ExecutionEnvironment.getExecutionEnvironment
val text = env.fromElements(1, 2, 3, 4, 5, 6, 7)
val text2 = text.reduceGroup(new GroupReduceFunction[Int, Int] {
override def reduce(iterable: Iterable[Int], collector: Collector[Int]): Unit = {
var sum = 0
val itor = iterable.iterator()
while (itor.hasNext) {
sum += itor.next()
}
collector.collect(sum)
}
})
text2.print()
val text3 = text.reduceGroup(new GroupReduceFunction[Int, (Int, Int)] {
override def reduce(iterable: Iterable[Int], collector: Collector[(Int, Int)]): Unit = {
var sum0 = 0
var sum1 = 0
val itor = iterable.iterator()
while (itor.hasNext) {
val v = itor.next
if (v % 2 == 0) {
sum0 += v
} else {
sum1 += v
}
}
collector.collect(sum0, sum1)
}
})
text3.print()
val data = env.fromElements(
("zhangsan", 1000), ("lisi", 1001), ("zhangsan", 3000), ("lisi", 1002))
val data2 = data.groupBy(0).reduceGroup(new GroupReduceFunction[(String, Int), (String, Int)]{
override def reduce(iterable: Iterable[(String, Int)], collector: Collector[(String, Int)]):
Unit = {
var salary = 0
var name = ""
val itor = iterable.iterator()
while (itor.hasNext) {
val t = itor.next()
name = t._1
salary += t._2
}
collector.collect(name, salary)
}
})
data2.print
}
}
执行结果:
text3.print()
28
text3.print()
(12,16)
data2.print
(lisi,2003)
(zhangsan,4000)