spark中wordcount的简单实现

spark中wordcount的简单实现

package com.atguigu.spark.core.RDD2

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

/**
 * @author Demigod_zhang
 * @create 2020-09-05 20:49
 */
object Wc_Test {
  def main(args: Array[String]): Unit = {
    //定义
    val sparkConf: SparkConf = new SparkConf().setAppName("groupBy").setMaster("local[*]")
    val sc = new SparkContext(sparkConf)
    val RDD: RDD[String] = sc.makeRDD(List("Hello", "Hello", "Spark", "Spark","scala"),2)

    //groupby
//    val newRDD: RDD[(String, Int)] = RDD.groupBy(word =>word).map(tup =>(tup._1,tup._2.size))
//    newRDD.collect().foreach(println)

    //reduceBykey
//    val newRDD: RDD[(String, Int)] = RDD.map(word =>(word,1)).reduceByKey(_+_)
//    newRDD.collect().foreach(println)

    //groupByKey
//    val newRDD: RDD[(String, Int)] = RDD.map(word =>(word,1)).groupByKey().map(tup =>(tup._1,tup._2.sum))
//    newRDD.collect().foreach(println)

    //aggregateByKey
//    val newRDD: RDD[(String, Int)] = RDD.map(word =>(word,1)).aggregateByKey(0)(_+_,_+_)
//    newRDD.collect().foreach(println)

    //foldByKey
//    val newRDD: RDD[(String, Int)] = RDD.map(word =>(word,1)).foldByKey(0)(_+_)
//    newRDD.collect().foreach(println)

    //combineBykey
//    RDD.map(word =>(word,1)).combineByKey(
//      num => num,
//      (count: Int, num: Int) => count + num,
//      (count1: Int, count2: Int) => count1 + count2
//    ).collect().foreach(println)

    //countByKey
//    val newRDD: collection.Map[String, Long] = RDD.map(word =>(word,1)).countByKey()
//    newRDD.foreach(println)

    //countByValue
//    val newRDD: collection.Map[String, Long] = RDD.map(word =>word).countByValue()
//    newRDD.foreach(println)

    //cogroup
//    val rdd2 = sc.makeRDD(List(("a", 0)))
//    val newRDD: RDD[(String, Int)] = RDD.map((_, 1)).cogroup(rdd2).map {
//      case (word, (iter1, iter2)) => {
//        (word, iter1.size + iter2.size)
//      }
//    }.filter(tup => tup._1 != "a")
//    newRDD
//    newRDD.collect().foreach(println)

    //mapvalues
//    val newRDD: RDD[(String, Int)] = RDD.map((_,1)).groupBy(word =>word).mapValues(_.size).map{case ((word, num), count) => (word, count)}
//    newRDD.collect().foreach(println)

  }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值