spark RDD 存储单元
--示例1
hadoop fs -put /home/training/training_materials/data/frostroad.txt /loudacre/frostroad.txt
val myrdd=sc.textFile("/loudacre/frostroad.txt")
myrdd.count()
myrdd.collect()
--示例2
hadoop dfs -put /home/training/training_materials/data/weblogs/ /loudacre/
val logfiles="/loudacre/weblogs/*"
val logsRDD=sc.textFile(logfiles)
logsRDD.count()
logsRDD.take(1).foreach(println)
val jpglogsRDD=logsRDD.filter(line => line.contains(".jpg"))
jpglogsRDD.take(10).foreach(println)
jpglogsRDD.count()
logsRDD.map(line =>line.length).take(5)
--示例3
hadoop fs -put /home/training/training_materials/data/purplecow.txt /loudacre/purplecow.txt
val mydata=sc.textFile("/loudacre/purplecow.txt")
for(line<-mydata.take(2))
println(line)
mydata.take(2).foreach(println)