目录结构 /home/training/training_materials/dev1/exercises/spark-application/countjpgs/src/main/scala/stubs/CountJPGs.scala
编辑这个文件,代码如下:
package stubs
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
object CountJPGs {
def main(ages: Array[String]) {
if (args.length < 1) {
System.err.println("Usage: CountJPGs <file>")
System.exit(1)
}
val sc = new SparkContext()
val logfile = args(0)
val weblogs = sc.textFile(logfile)
val weblogsJpg = weblogs.map(_.split(' ')(6)).filter(_.contains(".jpg"))
val weblogJpgCount = weblogsJpg.count()
println("JPG Count : "+weblogsJpgCount)
sc.stop
System.exit(1)
}
}
进入 /home/training/training_materials/dev1/exercises/spark-application/countjpgs 文件夹下,对该项目