一、准备工作
-
scala-sdk安装
下载地址:scala-2.11.2,下载完成配置SCALA_HOME -
maven坐标
<properties> <spark.version>2.4.3</spark.version> <scala.version>2.11.2</scala.version> </properties> <!-- 导入spark的依赖 --> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.11</artifactId> <version>${spark.version}</version> </dependency> <!-- 导入spark-sql的依赖 --> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.11</artifactId> <version>${spark.version}</version> </dependency> <!-- 导入spark插件的依赖 --> <build> <plugins> <plugin> <groupId>org.scala-tools</groupId> <artifactId>maven-scala-plugin</artifactId> <version>2.15.1</version> <executions> <execution> <goals> <goal>compile</goal> <goal>testCompile</goal> </goals> </execution> </executions> <!--<configuration> <recompileMode>incremental</recompileMode> <scalaVersion>${scala.version}</scalaVersion> <launchers> <launcher> <id>app</id> <mainClass>com.staryea.scala.ScalaApplication</mainClass> <args> <arg>-deprecation</arg> </args> <jvmArgs> <jvmArg>-Xms256m</jvmArg> <jvmArg>-Xmx2048m</jvmArg> </jvmArgs> </launcher> </launchers> </configuration>--> </plugin> </plugins> </build>
-
Idea插件下载scala
也上版本亲测可用
二、具体实例
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
/**
* @author: hs
* @Date: 2019/11/5 11:31
* @Description:
*/
object SparkTest {
def main(args: Array[String]): Unit = {
//创建SparkConf对象,设置SparkConf的运行环境
val conf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("WordCount")
//创建Spark上下文对象
val sc: SparkContext = new SparkContext(conf)
val lines: RDD[String] = sc.textFile("in/word.txt")
val word: RDD[String] = lines.flatMap(_.split(" "))
val one: RDD[(String, Int)] = word.map((_, 1))
val sum: RDD[(String, Int)] = one.reduceByKey(_ + _)
sum.collect().filter(_._2 > 1).map(s => s._2).foreach(println(_))
}
}