1.基于jdk1.8创建一个maven项目
2.增加scala-sdk
Project Structure - Platform Settings - Global Libraries
因为在学习中使用的spark版本,需要适配scala2.12.x版本(该文中用到的是scala_2.12.11)
3.增加Spark的Maven依赖
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.12</artifactId>
<version>3.0.0</version>
</dependency>
</dependencies>
4.WrodCount试运行
package com.atguigu.bigdta.spark.core
import org.apache.spark.api.java.JavaSparkContext.fromSparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object Test01 {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setMaster("local").setAppName("WordCount")
val sc = new SparkContext(sparkConf)
val lines: RDD[String] = sc.textFile("datas")
val words: RDD[String] = lines.flatMap(_.split(" "))
val wordGroup: RDD[(String, Iterable[String])] = words.groupBy(word => word)
val wordToCount: RDD[(String, Int)] = wordGroup.map {
case (word, list) => {
(word, list.size)
}
}
val array: Array[(String, Int)] = wordToCount.collect()
array.foreach(println)
//TODO 关闭连接
sc.close()
}
}