Spark并行化集合创建RDD
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/6b6c6615e6f019c49726c97bc284db5c.png)
object ParallelizeCollection {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("ParallelizeRDD").setMaster("local")
val sc = new SparkContext(conf)
val array = Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
val rdd = sc.parallelize(array,5)
val sum = rdd.reduce((x, y) => x + y)
println("累加和:" + sum)
}
}
Spark根据本地文件创建RDD
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/45088bd5a64b93939af76d41501df49d.png)
object LocalFileRDD {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("LocalFileRDD").setMaster("local")
val sc = new SparkContext(conf)
val rdd1 = sc.textFile("/Users/wuwang/Desktop/data.txt")
val rdd2 = sc.textFile("/Users/wuwang/Desktop/data.zip")
val rdd5 = sc.binaryFiles("/Users/wuwang/Desktop/data.zip")
val rdd = rdd5.flatMap {
case (name: String, content: PortableDataStream) =>
val zis = new ZipInputStream(content.open())
Stream.continually(zis.getNextEntry)
.takeWhile(_ != null)
.flatMap(_ => {
val reader = new BufferedReader(new InputStreamReader(zis))
Stream.continually(reader.readLine()).takeWhile(_ != null)
})
}
val rdd3 = sc.textFile("/Users/wuwang/Desktop/data/")
}