Spark——RDD算子

http://homepage.cs.latrobe.edu.au/zhe/ZhenHeSparkRDDAPIExamples.html

/\\\\\\


mapPartitionsWithIndex
val rdd1=sc.parallelize(List(1,2,3,4,5,6,7,8,9),2)
def myfunc(index: Int, iter: Iterator[(Int)]) : Iterator[String] = {
        iter.map(x => "[partID:" +  index + ", val: " + x + "]")
      }
rdd1.mapPartitionsWithIndex(myfunc).collect
res1: Array[String] = Array([partID:0, val: 1], [partID:0, val: 2], [partID:0, val: 3], [partID:0, val: 4], [partID:1, val: 5], [partID:1, val: 6], [partID:1, val: 7], [partID:1, val: 8], [partID:1, val: 9])

repartition
val rdd2 = rdd1.repartition(3)
rdd2.partitions.length
res5: Int = 3
rdd2.mapPartitionsWithIndex(myfunc).collect
res6: Array[String] = Array([partID:0, val: 3], [partID:0, val: 6], [partID:0, val: 9], [partID:1, val: 1], [partID:1, val: 4], [partID:1, val: 7], [partID:2, val: 2], [partID:2, val: 5], [partID:2, val: 8])

coalesce
val rdd2 = rdd1.coalesce(3)
rdd2.mapPartitionsWithIndex(myfunc).collect
res7: Array[String] = Array([partID:0, val: 1], [partID:0, val: 2], [partID:0, val: 3], [partID:0, val: 4], [partID:1, val: 5], [partID:1, val: 6], [partID:1, val: 7], [partID:1, val: 8], [partID:1, val: 9])
val rdd2 = rdd1.coalesce(3,true)
rdd2.mapPartitionsWithIndex(myfunc).collect
res8: Array[String] = Array([partID:0, val: 3], [partID:0, val: 6], [partID:0, val: 9], [partID:1, val: 1], [partID:1, val: 4], [partID:1, val: 7], [partID:2, val: 2], [partID:2, val: 5], [partID:2, val: 8])

collectAsMap
val rdd = sc.parallelize(List(("a",1),("b",2)))
rdd.collectAsMap
res9: scala.collection.Map[String,Int] = Map(b -> 2, a -> 1)
rdd.collect
res10: Array[(String, Int)] = Array((a,1), (b,2))

countByKey,countByValue
val rdd1 = sc.parallelize(List(("a",1),("b",2),("b",2),("c",2),("c",2)))
rdd1.count
res11: Long = 5
rdd1.countByKey
res12: scala.collection.Map[String,Long] = Map(a -> 1, b -> 2, c -> 2)
rdd1.countByValue
res13: scala.collection.Map[(String, Int),Long] = Map((c,2) -> 2, (a,1) -> 1, (b,2) -> 2)

filterByRange
val rdd1 = sc.parallelize(List(("a",1),("e",5),("d",4),("c",2),("b",6),("c",3)))
rdd1.filterByRange("b","d").collect
res14: Array[(String, Int)] = Array((d,4), (c,2), (b,6), (c,3))

flatMapValues
val a = sc.parallelize(List(("a","1 2"),("b","3 4")))
a.flatMapValues(_.split(" ")).collect
res15: Array[(String, String)] = Array((a,1), (a,2), (b,3), (b,4))

foldByKey
val rdd1 = sc.parallelize(List("dog","wolf","cat","beer"),2)
rdd1.map(x => (x.length,x)).collect
res17: Array[(Int, String)] = Array((3,dog), (4,wolf), (3,cat), (4,beer))
val rdd2 = rdd1.map(x => (x.length,x))
rdd2.foldByKey("")(_+_).collect
res19: Array[(Int, String)] = Array((4,wolfbeer), (3,dogcat))

keyBy()
val rdd1 = sc.parallelize(List("dog","wolf","elephant","salmon"),3)
val rdd2 = rdd1.keyBy(_.length)
rdd2.collect
res21: Array[(Int, String)] = Array((3,dog), (4,wolf), (8,elephant), (6,salmon))
val rdd3 = rdd1.keyBy(_(0))
rdd3.collect
res22: Array[(Char, String)] = Array((d,dog), (w,wolf), (e,elephant), (s,salmon))

/\\\\\\\\

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值