<dependencies> <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-library</artifactId> <version>2.11.12</version> </dependency> <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-compiler</artifactId> <version>2.11.12</version> </dependency> <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-reflect</artifactId> <version>2.11.12</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.11</artifactId> <version>2.4.5</version> </dependency> </dependencies> <build> <plugins> <!-- Scala Compiler --> <plugin> <groupId>org.scala-tools</groupId> <artifactId>maven-scala-plugin</artifactId> <version>2.15.2</version> <executions> <execution> <goals> <goal>compile</goal> <goal>testCompile</goal> </goals> </execution> </executions> </plugin> </plugins> </build>
package com.shujia.core
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object Demo01WordCount {
def main(args: Array[String]): Unit = {
//初始化Spark环境
//创建Spark配置对象
val conf: SparkConf = new SparkConf()
//配置任务的名称
conf.setAppName("Demo01WordCount")
//配置Spark的运行方式 local表示本地运行
conf.setMaster("local")
//1、创建Spark上下文对象 即Spark的入口
val sc: SparkContext = new SparkContext(conf)
//2、读取文件 将文件构建成RDD
/**
* RDD:弹性分布式数据集
* “暂时”可以将其理解为Scala中的一个集合,使用起来类似List
*
* Spark并没有自己的读文件的方式 使用的还是MR的切片、格式化数据的方式 是基于TextInputFormat类
*/
val linesRDD: RDD[String] = sc.textFile("Spark/data/words.txt")
linesRDD.foreach(println)
}
}
java,spark,java,hadoop
java,spark,java,hadoop
java,spark,java,hadoop
java,spark,java,hadoop
java,spark,java,hadoop
java,spark,java,hadoop
java,spark,java,hadoop
java,spark,java,hadoop
java,spark,java,hadoop
java,spark,java,hadoop