Spark:Scala实现常用的Transformation操作

源码如下

package cn.spark.study.core

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.SparkConf
import sun.reflect.generics.scope.ClassScope

object TransformationOperation {
  def main(args:Array[String]){
    //map();
    //filter()
    //flatMap()
    groupByKey()
    //reduceByKey()
    //sortByKey()
    //join()
    //coGroup()
  }

  def map(){
    val conf = new SparkConf().setAppName("map").setMaster("local");

val sc = new SparkContext(conf);

val numbers = Array(1,2,3,4,5);

val numberRDD = sc.parallelize(numbers, 1);

val multipleNumberRDD = numberRDD.map{num => num*2}

multipleNumberRDD.foreach{num=>println(num)}

  }

  def filter(){
    val conf =new SparkConf().setAppName("filter").setMaster("local");

val sc = new SparkContext(conf);

val numbers = Array(1,2,4,8,10,12,13,14,16,17);

val numberRDD = sc.parallelize(numbers, 1);

val evenNumberRDD = numberRDD.filter(num => num%2 == 0)

evenNumberRDD.foreach{num =>println(num)}
  }

   def flatMap(){
    val conf = new SparkConf().setAppName("flatMap").setMaster("local");

val sc = new SparkContext(conf);

val lineArray = Array("hello you","hello me","hello world");

val lines = sc.parallelize(lineArray, 1);

val words = lines.flatMap{line => line.split(" ")}

words.foreach(line =>println(line))
  }

   def groupByKey(){
     val conf = new SparkConf().setAppName("groupByKey").setMaster("local");

 val sc = new SparkContext(conf);

 val scoreList = Array(Tuple2("class1",80),Tuple2("class2",90),Tuple2("class1",70),Tuple2("class2",83))

 val scores = sc.parallelize(scoreList, 1)

 val groupedScores = scores.groupByKey()

 groupedScores.foreach(score => {
   println(score._1) ;
   score._2.foreach { singleScore => println(singleScore) }
   println("==========================")
 })
   }

   def reduceByKey(){
       val conf = new SparkConf().setAppName("reduceByKey").setMaster("local");

 val sc = new SparkContext(conf);

 val scoreList = Array(Tuple2("class1",80),Tuple2("class2",90),Tuple2("class1",70),Tuple2("class2",83))

 val scores = sc.parallelize(scoreList, 1)

 val totalScores = scores.reduceByKey(_+_);

 totalScores.foreach(classScore => println(classScore._1+":"+classScore._2))
   }

   def sortByKey(){
      val conf = new SparkConf().setAppName("reduceByKey").setMaster("local");

 val sc = new SparkContext(conf);

 val scoreList = Array(Tuple2(65,"leo"),Tuple2(80,"tom"),Tuple2(95,"marry"),Tuple2(74,"jack"))

 val scores = sc.parallelize(scoreList, 1)

 val sortedScores = scores.sortByKey(false)

 sortedScores.foreach(studentScore => println(studentScore._1+":"+studentScore._2))
       }

       def join(){
         val conf = new SparkConf().setAppName("join").setMaster("local");

 val sc = new SparkContext(conf);

    val studentList = Array (
            Tuple2(1, "leo"),
            Tuple2(2, "jack"),
            Tuple2(3, "tom"));

    val scoreList = Array (
            Tuple2(1, 100),
            Tuple2(2, 90),
            Tuple2(3, 70));

    val students = sc.parallelize(studentList, 1)
    val scores = sc.parallelize(scoreList, 1)

    val studentScores = students.join(scores)

    studentScores.foreach(studentScore => {
      println("student id:" + studentScore._1) 
      println("student name:"+studentScore._2._1)
      println("student score:"+studentScore._2._2)
      println("====================")
      })
   }

   def coGroup(){
      val conf = new SparkConf().setAppName("join").setMaster("local");

 val sc = new SparkContext(conf);

    val studentList = Array (
            Tuple2(1, "leo"),
            Tuple2(2, "jack"),
            Tuple2(3, "tom"));

    val scoreList = Array (
            Tuple2(1, 100),
            Tuple2(2, 90),
            Tuple2(3, 70),
            Tuple2(1, 95),
            Tuple2(2, 80),
            Tuple2(3, 60)
            );

    val students = sc.parallelize(studentList, 1)
    val scores = sc.parallelize(scoreList, 1)

    val studentScores = students.cogroup(scores);

    studentScores.foreach(studentScore =>{
      println("student id:" + studentScore._1) 
      println("student name:"+studentScore._2._1)
      println("student score:"+studentScore._2._2)
      println("====================")

    })
   }
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值