var tuples = ArrayBuffer[(Long, Int)]()//将集合定义成成员变量 def main(args: Array[String]): Unit = { val ints = Array(1, 2, 3, 4) val conf = new SparkConf().setAppName("test").setMaster("local") val sc = new SparkContext(conf) val parallelize = sc.parallelize(ints) val result = parallelize.map(u => { tuples.append((u.toLong, 1)) }).collect() println(tuples.toString()) }
注意:分布式计算中如果想更新结合或者数组,要将集合或者数据定义成成员变量,否则加不进去