maven 项目,环境
<scala.version>2.11.11</scala.version>
<spark.version>2.1.1</spark.version>
<hadoop.version>2.8.0</hadoop.version>
---------------------------------------------------
需在windows下载并配置hadoop环境变量
----------------------------------------------------
import org.apache.spark.rdd.RDD import org.apache.spark.{SparkConf, SparkContext} object Test1 { def main(args: Array[String]): Unit = { System.setProperty("hadoop.home.dir", "D:\\MySorfware\\hadoop-2.8.0"); val conf = new SparkConf().setAppName("Test1").setMaster("local"); val sc = new SparkContext(conf); val lines: RDD[String] = sc.textFile("D:\\MySorfware\\MyTestFile\\test1\\11.txt"); val words: RDD[String] = lines.flatMap(_.split(" ")); val wordAndOne: RDD[(String,Int)] = words.map((_, 1)); val reduced: RDD[(String, Int)] = wordAndOne.reduceByKey(_+_); val sorted: RDD[(String, Int)] = reduced.sortBy(_._2); sorted.saveAsTextFile("D:\\MySorfware\\MyTestFile\\test2\\"); sc.stop(); }