数据格式:
撒地方 213
萨芬 123
657 jhkjhk
萨芬 123
gjh 123
意图以 2354
萨芬 123
package com.agm.words
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
import org.apache.spark.sql._
import java.io._
import org.apache.log4j.{Level, Logger}
object wordcount {
def main(args: Array[String]) :Unit = {
Logger.getLogger("org").setLevel(Level.ERROR)
/*
val path = new File(".").getCanonicalPath()
System.getProperties().put("hadoop.home.dir", path);
new File("./bin").mkdirs();
new File("./bin/winutils.exe").createNewFile();
*/
// val logFile = "D:\\Spark\\spark-2.0.1-bin-hadoop2.6\\test.txt" // 本地文件目录
val logFile = "F:\\testData\\spark\\wordcount.txt" // 本地文件目录
val conf = new SparkConf().setAppName("Simple Application") //给Application命名
conf.setMaster("local")
val sc = new SparkContext(conf) //创建SparkContext
//val sqlContext = new SQLContext(sc)
val logData = sc.textFile(logFile).cache()
println("safd")
val res = logData.flatMap(f=>f.split(" ")).map(f=>(f,1)).reduceByKey((a,b)=>a+b)
res.foreach(println)
println("zuida" + res.max())
val max = res.reduce((a,b)=>if(a._2>b._2) a else b)
println(max)
}
}