新建maven工程项目
添加pom.xml依赖
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.12</artifactId>
<version>3.0.0</version>
</dependency>
新建Scala object —>wordCount
package com.demo.bigdata.spark.core.WordCount
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
/**
* USER--ZhuYuYing
* DATA--2021/3/24
* */
object SparkWordCount {
def main(args: Array[String]): Unit = {
//1.建立和spark框架的链接
//1.1 jdbc:connection
val sparkConf = new SparkConf().setMaster("local").setAppName("WordCount")
val sc = new SparkContext(sparkConf)
//2.执行业务操作
//2.1读取文件,获取行数据
//hello world
val lines: RDD[String] = sc.textFile("C:\\Users\\ZhuYuYing\\IdeaProjects\\spark-study\\datas\\test02.txt")
// val lines: RDD[String] = sc.textFile("datas")
println(lines)
//2.2拆分行数据
//hello world =》hello, world
val words:RDD[String] = lines.flatMap(_.split(" "))//扁平化:将整体拆分个体
//2.3根据单词进行分组统计
//(hello,hello,hello)(world,world)
val wordGroup: RDD[(String,Iterable[String])] = words.groupBy(word=>word)
//2.4对分组后的数据进行转换
//(hello,hello,hello)(world,world)
//(hello,3)(world,2)
val value = wordGroup.map {
case (word, list) => {
(word, list.size)
}
}
//2.5将结果打印
val array:Array[(String,Int)]= value.collect()
array.foreach(println)
//3.关闭链接
sc.stop()
}
}
模拟数据,在新建package包datas,再新建txt
hello world
hello spark
hello world
hello spark
hello world
hello spark
hello world
hello spark
文件一览
点击运行即可在控制台打印计算结果