1.map算子
def map() {
val conf = new SparkConf()
.setAppName("map")
.setMaster("local")
val sc = new SparkContext(conf)
val numbers = Array(1,2,3,4,5)
val numberRDD = sc.parallelize(numbers, 1)
val multipleNumberRDD = numberRDD.map { num => num * 2 }
multipleNumberRDD.foreach { num => println(num) }
}
2.filter算子
def filter() {
val conf = new SparkConf()
.setAppName("filter")
.setMaster("local")
val sc = new SparkContext(conf)
val numbers = Array(1,2,3,4,5,6,7,8,9,10)
val numberRDD = sc.parallelize(numbers, 1)
val evenNumberRDD = numberRDD.filter { num => num%2 == 0 }
evenNumberRDD.foreach { num => println(num) }
}
3.flatMap算子
def flatMap() {
val conf = new SparkConf()
.setAppName("flatMap")
.setMaster("local")
val sc = new SparkContext(conf)
val lineArray = Array("hello you","hello me","hello world")
val lines = sc.parallelize(lineArray, 1)
val words = lines.flatMap { line => line.split(" ") }
words.foreach { word => println(word) }
}
4.groupByKey算子
def groupByKey() {
val conf = new SparkConf()
.setAppName("groupByKey")
.setMaster("local")
val sc = new SparkContext(conf)
val scoreList = Array(Tuple2("class1",80),
Tuple2("class2",86),Tuple2("class1",98),
Tuple2("class2",83))
val socres = sc.parallelize(scoreList, 1)
val groupScores = socres.groupByKey()
groupScores.foreach(
score => {
println(score._1)
score._2.foreach {
singleScore => println(singleScore)
}
println("========================")
})
}
5.reduceByKey算子
def reduceByKey() {
val conf = new SparkConf()
.setAppName("reduceByKey")
.setMaster("local")
val sc = new SparkContext(conf)
val scoreList = Array(Tuple2("class1",80),
Tuple2("class2",86),Tuple2("class1",98),
Tuple2("class2",83))
val scores = sc.parallelize(scoreList, 1)
val totalScores = scores.reduceByKey( _+_ )
totalScores.foreach(classScore => println(classScore._1 + ":" + classScore._2))
}
6.sortByKey算子
def sortByKey() {
val conf = new SparkConf()
.setAppName("sortByKey")
.setMaster("local")
val sc = new SparkContext(conf)
val scoreList = Array(Tuple2(78, "marry"),
Tuple2(89, "tom"),
Tuple2(72, "jack"),
Tuple2(86, "leo"))
val scores = sc.parallelize(scoreList, 1)
val sortedScores = scores.sortByKey()
sortedScores.foreach(studentScore => println(studentScore._1 + ":" + studentScore._2))
}
def join() {
val conf = new SparkConf()
.setAppName("join")
.setMaster("local")
val sc = new SparkContext(conf)
val studentList = Array(
Tuple2(1,"leo"),
Tuple2(2,"jack"),
Tuple2(3,"tom"))
val scoreList = Array(
Tuple2(1,78),
Tuple2(2,87),
Tuple2(3,94))
val students = sc.parallelize(studentList, 1)
val scores = sc.parallelize(scoreList, 1)
val studentScores = students.join(scores)
studentScores.foreach(studentScore => {
println("student id:" + studentScore._1);
println("student name:" + studentScore._2._1)
println("student score:" + studentScore._2._2)
println("===================================")
})
}
8.cogroup算子
def cogroup() {
val conf = new SparkConf()
.setAppName("cogroup")
.setMaster("local")
val sc = new SparkContext(conf)
val studentList = Array(
Tuple2(1,"leo"),
Tuple2(2,"jack"),
Tuple2(3,"tom"))
val scoreList = Array(
Tuple2(1,78),
Tuple2(2,87),
Tuple2(3,94),
Tuple2(1,88),
Tuple2(2,97),
Tuple2(3,79))
val students = sc.parallelize(studentList, 1)
val scores = sc.parallelize(scoreList, 1)
val studentScores = students.cogroup(scores)
studentScores.foreach(studentScore => {
println("student id:" + studentScore._1);
println("student name:" + studentScore._2._1)
println("student score:" + studentScore._2._2)
println("===================================")
})
}