![](https://img-blog.csdnimg.cn/20201014180756927.png?x-oss-process=image/resize,m_fixed,h_64,w_64)
spark
LittleCookie_
这个作者很懒,什么都没留下…
展开
-
spark-filter(1)
需求:从数据源中保留奇数package com.atguigu.bigdata.spark.core.operator.transform.testimport org.apache.spark.rdd.RDDimport org.apache.spark.{SparkConf, SparkContext}import java.text.SimpleDateFormatimport java.util.Dateobject RDD_filter_01 { def main(args:原创 2021-06-09 09:40:43 · 209 阅读 · 0 评论 -
spark-groupBy(3)
需求:从日志文件中获取每个时间段的访问量package com.atguigu.bigdata.spark.core.operator.transform.testimport org.apache.spark.rdd.RDDimport org.apache.spark.{SparkConf, SparkContext}import java.text.SimpleDateFormatimport java.util.Dateobject RDD_groupBy_03 { def m原创 2021-06-08 10:58:03 · 106 阅读 · 0 评论 -
spark-groupBy(2)
需求:给数据源的数据首字母相同的分组package com.atguigu.bigdata.spark.core.operator.transform.testimport org.apache.spark.rdd.RDDimport org.apache.spark.{SparkConf, SparkContext}object RDD_groupBy_02 { def main(args: Array[String]): Unit = { val sparkConf = new原创 2021-06-08 10:24:04 · 124 阅读 · 0 评论 -
spark-groupBy(1)
需求:给数据源的数据%2然后进行分组package com.atguigu.bigdata.spark.core.operator.transform.testimport org.apache.spark.rdd.RDDimport org.apache.spark.{SparkConf, SparkContext}object RDD_groupBy_01 { def main(args: Array[String]): Unit = { val sparkConf = new原创 2021-06-08 10:19:07 · 341 阅读 · 0 评论 -
spark-算子-flatMap(3)
需求:将List(List(1,2),3,List(4,5))扁平化操作package com.atguigu.bigdata.spark.core.operator.transform.testimport org.apache.spark.rdd.RDDimport org.apache.spark.{SparkConf, SparkContext}object RDD_flatMap_03 { def main(args: Array[String]): Unit = { v原创 2021-06-08 09:13:10 · 160 阅读 · 0 评论 -
spark-算子-flatMap(2)
需求:获取每一个单词package com.atguigu.bigdata.spark.core.operator.transform.testimport org.apache.spark.rdd.RDDimport org.apache.spark.{SparkConf, SparkContext}object RDD_flatMap_02 { def main(args: Array[String]): Unit = { val sparkConf = new SparkCo原创 2021-06-08 09:12:00 · 52 阅读 · 0 评论 -
spark-算子-flatMap(1)
算子 -flatMap - 扁平映射只要返回的结果是一个可迭代的集合就满足要求package com.atguigu.bigdata.spark.core.operator.transform.testimport org.apache.spark.rdd.RDDimport org.apache.spark.{SparkConf, SparkContext}object RDD_flatMap_01 { def main(args: Array[String]): Unit = {原创 2021-06-08 09:11:01 · 81 阅读 · 0 评论 -
spark-mapPartitionsWithIndex
算子 -mapPartitionsWithIndex - 将待处理的数据以分区为单位发送到计算节点进行处理,这里的处理是指可以进行任意的处理,哪怕是过滤数据,在处理时同时可以获取当前分区索引需求:获取第二个分区的数据package com.atguigu.bigdata.spark.core.operator.transform.testimport org.apache.spark.rdd.RDDimport org.apache.spark.{SparkConf, SparkContext}原创 2021-06-08 09:09:14 · 448 阅读 · 0 评论 -
spark-mapPartitions练习
需求:获取每个分区的最大值package com.atguigu.bigdata.spark.core.operator.transform.testimport org.apache.spark.rdd.RDDimport org.apache.spark.{SparkConf, SparkContext}object RDD_mapPartitions_02 { def main(args: Array[String]): Unit = { val sparkConf = ne原创 2021-06-08 09:06:52 · 208 阅读 · 0 评论 -
spark-mapPartitions与map的区别
package com.atguigu.bigdata.spark.core.operator.transform.testimport org.apache.spark.rdd.RDDimport org.apache.spark.{SparkConf, SparkContext}object RDD_map_04 { def main(args: Array[String]): Unit = { val sparkConf = new SparkConf().setMaster(原创 2021-06-08 09:05:13 · 70 阅读 · 0 评论 -
spark-map算子(3)-并行计算效果演示
package com.atguigu.bigdata.spark.core.operator.transform.testimport org.apache.spark.rdd.RDDimport org.apache.spark.{SparkConf, SparkContext}object RDD_map_03 { def main(args: Array[String]): Unit = { val sparkConf = new SparkConf().setMaster(原创 2021-06-07 21:20:10 · 332 阅读 · 1 评论 -
spark-map算子(2)文件处理案例
package com.atguigu.bigdata.spark.core.operator.transform.testimport org.apache.spark.rdd.RDDimport org.apache.spark.{SparkConf, SparkContext}object RDD_map_02 { def main(args: Array[String]): Unit = { val sparkConf = new SparkConf().setMaster(原创 2021-06-07 17:53:47 · 160 阅读 · 0 评论 -
spark-map算子(1)
package com.atguigu.bigdata.spark.core.operator.transform.testimport org.apache.spark.rdd.RDDimport org.apache.spark.{SparkConf, SparkContext}object RDD_map_01 { def main(args: Array[String]): Unit = { val sparkConf = new SparkConf().setMaster(原创 2021-06-07 17:18:17 · 285 阅读 · 0 评论 -
spark-并行度 & 分区(分区的设定)
package com.atguigu.bigdata.spark.core.rdd.builderimport org.apache.spark.rdd.RDDimport org.apache.spark.{SparkConf, SparkContext}object Spark01_RDD_Memory_Par { def main(args: Array[String]): Unit = { // TODO 准备环境 // local[*] *表示当前系统最大可用核数原创 2021-06-07 16:51:44 · 374 阅读 · 0 评论 -
spark-创建第一个RDD
package com.atguigu.bigdata.spark.core.rdd.builderimport org.apache.spark.rdd.RDDimport org.apache.spark.{SparkConf, SparkContext}object Spark01_RDD_Memory { def main(args: Array[String]): Unit = { // TODO 准备环境 // local[*] *表示当前系统最大可用核数 如果直接原创 2021-06-07 16:40:08 · 97 阅读 · 0 评论 -
spark-textFile与wholeTextFiles的区别
package com.atguigu.bigdata.spark.core.rdd.builderimport org.apache.spark.{SparkConf, SparkContext}object Spark02_RDD_File02 { def main(args: Array[String]): Unit = { // TODO 准备环境 // local[*] *表示当前系统最大可用核数 如果直接写local表示多线程模拟单核 val sparkCon原创 2021-06-07 10:02:16 · 412 阅读 · 0 评论 -
spark-textFile的使用
package com.atguigu.bigdata.spark.core.rdd.builderimport org.apache.spark.rdd.RDDimport org.apache.spark.{SparkConf, SparkContext}object Spark02_RDD_File { def main(args: Array[String]): Unit = { // TODO 准备环境 // local[*] *表示当前系统最大可用核数 如果直接写l原创 2021-06-07 10:00:57 · 1020 阅读 · 0 评论