package chen
import org.apache.spark._
object rdd_test {
System.setProperty ("hadoop.home.dir", "C:\\hadoop_home\\")
def main(args: Array[String]) {
/*
* countByKey for foreach sortBy 学习
*
*/
val sparkConf = new SparkConf().setMaster("local").setAppName("rdd")
val sc = new SparkContext(sparkConf)
val rdd1 = sc.makeRDD(Array(("A", 0), ("A", 2), ("B", 1), ("B", 2), ("B", 3)))
rdd1.foreach(println(_))
/*
(A,0)
(A,2)
(B,1)
(B,2)
(B,3)
*/
for (elem <- rdd1.countByKey) {
println(elem)
}
/*
(B,3)
(A,2)
*/
var cnt = sc.accumulator(0)
val rdd2 = sc.makeRDD(1 to 10,2)
rdd2.foreach(println(_)) //输出: 1 2 3 4 5 6 7 8 9 10
rdd2.foreach(x => cnt += x)
println(cnt) //55
val rdd3 = sc.makeRDD(Seq(3,6,7,1,2,0),2)
rdd3.sortBy(x => x).collect.foreach(println(_)) // 0, 1, 2, 3, 6, 7 默认升序
rdd3.sortBy(x => x, false).collect.foreach(println(_)) // 7, 6, 3, 2, 1, 0 降序
//RDD[K,V]类型
//按照kye来排序
rdd1.sortBy(x=>x).collect().foreach(println(_))
//按照value的升序排列,false就降序
rdd1.sortBy(x=>x._2, true).collect().foreach(println(_))
/*
(A,0)
(B,1)
(A,2)
(B,2)
(B,3)
*/
}
}
Spark算子:RDD行动Action操作学习–countByKey、foreach、sortBy
最新推荐文章于 2023-05-24 17:08:24 发布