原始数据:
实现代码:
object avgAge {
def main(args:Array[String]): Unit = {
def FILE_NAME: String = "avgAge_results"
val conf = new SparkConf().setMaster("local[*]").setAppName("MyFirstSpark")
val sc = new SparkContext(conf);
val textFile = transfer(sc, "/home/saprk/data/GDYKT/2016.1-2.csv")
val nameCounts = textFile.flatMap(line=>line.split(" ")).map(line=>(line.split(",")(0),line.split(",")(18),line.split(",")(31))) //获取相应的列
nameCounts.saveAsTextFile(FILE_NAME+System.currentTimeMillis())
}
运行结果: