text2.print();
FLINK VS SPARK--##bigdata##
BUFFER VS SHUFFER--##bigdata##
text3.print();
(FLINK VS SPARK,14)
(BUFFER VS SHUFFER,18)
text4.print();
Wc(FLINK VS SPARK,14)
Wc(BUFFER VS SHUFFER,18)
mapPartition
以partition为粒度,对element进行1:1的转化。有时候会比map效率高。
执行程序:
package code.book.batch.dataset.advance.api
import java.lang.Iterable
import org.apache.flink.api.common.functions.{MapFunction, MapPartitionFunction}
import org.apache.flink.api.scala.{ExecutionEnvironment, _}
import org.apache.flink.util.Collector
objectMapPartitionFunction001scala {def main(args: Array[String]): Unit = {
// 1.设置运行环境,创造测试数据val env = ExecutionEnvironment.getExecutionEnvironment
val text = env.fromElements("flink vs spark", "buffer vs shuffer")
//2.以partition为粒度,进行map操作,计算element个数val text2 = text.mapPartition(new MapPartitionFunction[String, Long]() {
overridedef mapPartition(iterable: Iterable[String], collector: Collector[Long]): Unit = {
var c = 0val itor = iterable.iterator()
while (itor.hasNext) {
itor.next()
c = c + 1
}
collector.collect(c)
}
})
text2.print()
//3.以partition为粒度,进行map操作,转化element内容val text3 = text.mapPartition(partitionMapper = new MapPartitionFunction[String, String]() {
overridedef mapPartition(iterable: Iterable[String], collector: Collector[String]): Unit = {
val itor = iterable.iterator()
while (itor.hasNext) {
val line = itor.next().toUpperCase + "--##bigdata##"
collector.collect(line)
}
}
})
text3.print()
//4.以partition为粒度,进行map操作,转化为大写并,并计算line的长度。//4.1定义classcaseclassWc(line: String, lenght: Int)//4.2转化成class类型val text4 = text.mapPartition(new MapPartitionFunction[String, Wc] {
overridedef mapPartition(iterable: Iterable[String], collector: Collector[Wc]): Unit = {
val itor = iterable.iterator()
while (itor.hasNext) {
var s = itor.next()
collector.collect(Wc(s.toUpperCase(), s.length))
}
}
})
text4.print()
}
}
执行结果:
text2.print();
2
text3.print();
FLINK VS SPARK--##bigdata##
BUFFER VS SHUFFER--##bigdata##
text4.print();
Wc(FLINK VS SPARK,14)
Wc(BUFFER VS SHUFFER,18)