spark中各种transformation算子操作(scala版)

package cn.spark.study.core

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext

object TransformationOperation {
def main(args: Array[String]): Unit = {
//mapTest
//filterTest
//flatMap
//groupByKeyTest()
//reduceByKeyTest
sortByKeyTest()
}

/**
* map算子
* 将集合中的元素都乘以2
*/
def mapTest(){
val conf = new SparkConf()
.setAppName(“map”)
.setMaster(“local”)
val sc = new SparkContext(conf)
val numbers = Array(1,2,3,4,5)
val numberRDD = sc.parallelize(numbers, 1)
val multipleNumberRDD = numberRDD.map { number => number*2 }
multipleNumberRDD.foreach { num => println(num) }

}

/**
* filter算子
* 过滤集合中的偶数
*/
def filterTest{
val conf = new SparkConf()
.setAppName(“filter”)
.setMaster(“local”)
val sc = new SparkContext(conf)
val numbers = Array(1,2,3,4,5,6,7,8,9,10)
val numberRDD = sc.parallelize(numbers, 1)
val evenNumberRDD = numberRDD.filter { number => number % 2 == 0 }
evenNumberRDD.foreach { num => println(num) }
}

/**
* faltMap算子
* 将一行行的文本拆分为单词
*/
def flatMap{
val conf = new SparkConf()
.setAppName(“filter”)
.setMaster(“local”)
val sc = new SparkContext(conf)
val lines = Array(“hello you”,”hello me”,”hello world”)
val linesRDD = sc.parallelize(lines, 1)
val wordsRDD = linesRDD.flatMap { line => line.split(” “) }
wordsRDD.foreach { word => println(word) }
}

/**
* groupByKey算子
* 案例:按照班级将成绩进行分组
*/
def groupByKeyTest(){
val conf = new SparkConf()
.setAppName(“groupByKey”)
.setMaster(“local”)
val sc = new SparkContext(conf)
val scores = Array((“class1”,80),(“class2”,90),(“class1”,65),(“class2”,85))
val scoresRDD = sc.parallelize(scores, 1)
val groupedscores = scoresRDD.groupByKey
groupedscores.foreach(score => { println(“class:”+score._1); score._2.foreach { s => println(s) };println(“===============”)})
}

/**
* reduceByKey算子
* 案例:求每个班级所有人的总分
*/
def reduceByKeyTest{
val conf = new SparkConf()
.setAppName(“reduceByKey”)
.setMaster(“local”)
val sc = new SparkContext(conf)
val scores = Array((“class1”,80),(“class2”,90),(“class1”,65),(“class2”,85))
val scoresRDD = sc.parallelize(scores, 1)
val totalScores = scoresRDD.reduceByKey(+)

  totalScores.foreach(classScore => println(classScore._1+":"+classScore._2))

}

/**
* sortByKey算子
* 案例:对学生成绩进行排序
*/
def sortByKeyTest(){
val conf = new SparkConf()
.setAppName(“reduceByKey”)
.setMaster(“local”)
val sc = new SparkContext(conf)
val scores = Array((60,”leo”),(100,”ksc”),(99,”my”),(10,”jack”))
val scoresRDD = sc.parallelize(scores, 1)
val sortedScores = scoresRDD.sortByKey(false, 1)
sortedScores.foreach(score => println(score._1+” : “+score._2))
}
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值