val num = sc.parallelize(1 to 10,3)
指定3个分片(cpu)
//transformation
val doublenum = num.map(_*2)
val threenum = doublenum.filter(_ % 3 ==0)
//action
doublenum.collect()
threenum.collect()
action
reduce collect count first take(前几行) takeSample saveAsTextFile saveAsSequenceFile
countByKey foreach cache
transformation
map filter flatMap mapPartitions mapPartitionsWithSplit sample union distinct
groupByKey reduceByKey sortByKey join union cogroup((k,v1)(k,v2)->(k,v1,v2))
cartesian
join和 groupByKey 略有不同 组内合并
flatMap 组间合并
cache
threenum.cache() // 延后执行 action
指定3个分片(cpu)
//transformation
val doublenum = num.map(_*2)
val threenum = doublenum.filter(_ % 3 ==0)
//action
doublenum.collect()
threenum.collect()
action
reduce collect count first take(前几行) takeSample saveAsTextFile saveAsSequenceFile
countByKey foreach cache
transformation
map filter flatMap mapPartitions mapPartitionsWithSplit sample union distinct
groupByKey reduceByKey sortByKey join union cogroup((k,v1)(k,v2)->(k,v1,v2))
cartesian
join和 groupByKey 略有不同 组内合并
flatMap 组间合并
cache
threenum.cache() // 延后执行 action
threenum.unpersist() // 立即执行
process_local
node_local