// 在控制台执行:spark-shell
// ------------------------------------------------------
// 一、元素型 RDD数据
val rdd = sc.parallelize(1 to 10, 3)
rdd.glom.collect => rdd: Array[Array[Int]] = Array(Array(1, 2, 3), Array(4, 5, 6), Array(7, 8, 9, 10))
// transformation 转换,记录了RDD演变的过程,只有action才会触发transformation进行计算
// ------------------------------------------------------
// ====================【transformation 算子】====================
// 1、map算子:map(function) (针对每个元素做操作)
val result01 = rdd.map( t => t + 1)
result01.collect => result01: Array[Int] = Array(2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
// 2、mapPartitions算子:mapPartitions(function) (针对每个分区做操作)
val result02 = rdd.mapPartitions(t => t.map(s => s + 1))
result02.collect => result02: Array[Int] = Array(2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
// 3、flatMap算子:flatMap(function) (map+flatten)
val result03 = result05.flatMap(t => (t to 10))
result03.collect => result03: Array[Int] = Array(2, 3, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, 9, 10, 6, 7, 8, 9, 10, 8, 9, 10, 10)
// 4、glom算子:(把每个分区的数据都放到数组中)
val result04 = rdd.glom
result04.collect => result04: Array[Array[Int]] = Array(Array(1, 2, 3), Array(4, 5, 6), Array(7, 8, 9, 10))
// 5、filter算子:filter(function) (过滤)
val result05 = rdd.filter(t => t % 2
Spark-Shell 常用算子练习
最新推荐文章于 2021-08-10 16:04:25 发布