spark scala读取文件选取特定列
wordcount.txt:
a,1
c,2
a,1
选取最后一列
//选取特定列.
val data=sc.textFile("file:///E://table//wordcount.txt")
.flatMap(_.split("\n")) //按换行符分割文件,把文件分成一行行的
.map{
line=>
var splits=line.split(",").reverse(0) //把行按","分割,转置选取第一列即最后一列,
//选取其他列例如第一列:line.split(",")(0) ,
//选取多列map(line=>(line.split(",")(0),line.split(",")(18),line.split(",")(31)))
(splits,1)
}.reduceByKey(_+_).collect().foreach(println)