Spark去重操作

import Utils.SparkUtils
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD

object ReduceByKeyDemo {
  def main(args: Array[String]): Unit = {
    val sc: SparkContext =SparkUtils.getSparkContext()
    val tf: RDD[String] =sc.textFile("data/cc.txt")
    val fp: RDD[String] =tf.flatMap(_.split("\\s+"))
    val mp: RDD[(String, Int)] =fp.map((_,1))
   // mp.foreach(println)
   /* (2,cc,19,sing,1)
    (3,xx,20,cat,1)
    (2,cc,19,sing,1)
    (1,wnn,18,dangce,1)
    (2,cc,19,sing,1)
    (3,xx,20,cat,1)
    (4,ny,21,cook,1)*/
   /* (2,cc,19,sing,1)
    (3,xx,20,cat,1)
    (2,cc,19,sing,1)
    (1,wnn,18,dangce,1)
    (2,cc,19,sing,1)
    (3,xx,20,cat,1)
    (4,ny,21,cook,1)*/
    val rbk: RDD[(String, Int)] =mp.reduceByKey(_+_)
    rbk.foreach(println)
  /*  (4,ny,21,cook,1)
    (3,xx,20,cat,2)
    (1,wnn,18,dangce,1)
    (2,cc,19,sing,3)
*/


  }
}
import Utils.SparkUtils
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD

object DistinctDemo {
  def main(args: Array[String]): Unit = {
    val sc: SparkContext =SparkUtils.getSparkContext()
    val rdd: RDD[Int] =sc.makeRDD(List(1,2,3,4,5,3,2,4),2)
    //val rdd1: Array[Int] =rdd.collect()
    //rdd1.foreach(println)
   /* 1
    2
    3
    4
    5*/
    //val set: Set[Int] =rdd1.toSet
    //set.foreach(println)
   /* 5
    1
    2
    3
    4*/

  //val gb: RDD[(Int, Iterable[Int])] =rdd.groupBy(e=>e)
    //gb.foreach(println)
   /* (1,CompactBuffer(1))
    (4,CompactBuffer(4))
    (2,CompactBuffer(2))
    (3,CompactBuffer(3))
    (5,CompactBuffer(5))*/
  // val mp: RDD[Int] = gb.map(_._1)
   // mp.foreach(println)
    val kb: RDD[(Int, Int)] =rdd.keyBy(e=>e)
   // kb.foreach(println)
   /* (5,5)
    (3,3)
    (2,2)
    (4,4)
    (1,1)
    (2,2)
    (3,3)
    (4,4)*/
    val rbk: RDD[(Int, Int)] =kb.reduceByKey((e, _)=>e)
    rbk.foreach(println)
  }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值