Spark基础transformation操作实例(Scala版)

9 篇文章 0 订阅

1.map算子

def map() {
    val conf = new SparkConf()
            .setAppName("map")
            .setMaster("local")
    val sc = new SparkContext(conf)
    
    val numbers = Array(1,2,3,4,5)
    val numberRDD = sc.parallelize(numbers, 1)
    val multipleNumberRDD = numberRDD.map { num => num * 2 }
    
    multipleNumberRDD.foreach { num => println(num) }
  }

2.filter算子

def filter() {
    val conf = new SparkConf()
        .setAppName("filter")
        .setMaster("local")
        
    val sc = new SparkContext(conf)
    val numbers = Array(1,2,3,4,5,6,7,8,9,10)
    val numberRDD = sc.parallelize(numbers, 1)
    val evenNumberRDD = numberRDD.filter { num => num%2 == 0 }
    
    evenNumberRDD.foreach { num => println(num) }
  }

3.flatMap算子

def flatMap() {
    val conf = new SparkConf()
        .setAppName("flatMap")
        .setMaster("local")

    val sc = new SparkContext(conf)
    val lineArray = Array("hello you","hello me","hello world")
    val lines = sc.parallelize(lineArray, 1)
    val words = lines.flatMap { line => line.split(" ") }
    words.foreach { word => println(word) }
  }

4.groupByKey算子

def groupByKey() {
  
    val conf = new SparkConf()
        .setAppName("groupByKey")
        .setMaster("local")
    
    val sc = new SparkContext(conf)
    val scoreList = Array(Tuple2("class1",80),
        Tuple2("class2",86),Tuple2("class1",98),
        Tuple2("class2",83))
    val socres = sc.parallelize(scoreList, 1)
    val groupScores = socres.groupByKey()
    groupScores.foreach( 
        score => {
          println(score._1)
          score._2.foreach { 
            singleScore => println(singleScore) 
          
          }
          println("========================")  
          })
  }

5.reduceByKey算子

def reduceByKey() {
    val conf = new SparkConf()
        .setAppName("reduceByKey")
        .setMaster("local")
    val sc = new SparkContext(conf)
    
    val scoreList = Array(Tuple2("class1",80),
        Tuple2("class2",86),Tuple2("class1",98),
        Tuple2("class2",83))
    val scores = sc.parallelize(scoreList, 1)
    val totalScores = scores.reduceByKey( _+_ )
    totalScores.foreach(classScore => println(classScore._1 + ":" + classScore._2))
  }

6.sortByKey算子

def sortByKey() {
    val conf = new SparkConf()
        .setAppName("sortByKey")
        .setMaster("local")
    
    val sc = new SparkContext(conf)
    val scoreList = Array(Tuple2(78, "marry"),
                          Tuple2(89, "tom"),
                          Tuple2(72, "jack"),
                          Tuple2(86, "leo"))
    val scores = sc.parallelize(scoreList, 1)
    val sortedScores = scores.sortByKey()
    sortedScores.foreach(studentScore => println(studentScore._1 + ":" + studentScore._2))
  }


7.join算子

def join() {
    val conf = new SparkConf()
        .setAppName("join")
        .setMaster("local")
    val sc = new SparkContext(conf)
    val studentList = Array(
        Tuple2(1,"leo"),
        Tuple2(2,"jack"),
        Tuple2(3,"tom"))
        
    val scoreList = Array(
        Tuple2(1,78),
        Tuple2(2,87),
        Tuple2(3,94))
        
    val students = sc.parallelize(studentList, 1)
    val scores = sc.parallelize(scoreList, 1)
    
    val studentScores = students.join(scores)
    
    studentScores.foreach(studentScore => { 
      println("student id:" + studentScore._1);
      println("student name:" + studentScore._2._1) 
      println("student score:" + studentScore._2._2)  
      println("===================================")
    })
  }

8.cogroup算子

def cogroup() {
    val conf = new SparkConf()
        .setAppName("cogroup")
        .setMaster("local")
    val sc = new SparkContext(conf)
    val studentList = Array(
        Tuple2(1,"leo"),
        Tuple2(2,"jack"),
        Tuple2(3,"tom"))
        
    val scoreList = Array(
        Tuple2(1,78),
        Tuple2(2,87),
        Tuple2(3,94),
        Tuple2(1,88),
        Tuple2(2,97),
        Tuple2(3,79))
        
    val students = sc.parallelize(studentList, 1)
    val scores = sc.parallelize(scoreList, 1)
    
    val studentScores = students.cogroup(scores)
    
    studentScores.foreach(studentScore => { 
      println("student id:" + studentScore._1);
      println("student name:" + studentScore._2._1) 
      println("student score:" + studentScore._2._2)  
      println("===================================")
    })
  }



  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值