IDEA软件scala版本2.12.11
pml.xml
<dependencies>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.12.4</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-compiler</artifactId>
<version>2.12.4</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-reflect</artifactId>
<version>2.12.4</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.12</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.12</artifactId>
<version>3.0.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<version>2.15.2</version>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
代码:
import org.apache.spark.{SparkConf, SparkContext}
object Test01 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName(“one”).setMaster(“local[*]”)
val context = new SparkContext(conf)
val iterator = context.textFile(“data/a.txt”)
val list = iterator.map(x => {
val str = x.split(",")
(str(0), str(1), str(2), str(3))
})
println("求每个学生的总成绩")
list.map(x=>{
(x._1,(x._2.toInt+x._3.toInt+x._4.toInt))
}).foreach(println)
println("求每个学生的平均成绩")
list.map(x=>{
(x._1,(x._2.toInt+x._3.toInt+x._4.toInt)/3)
}).foreach(println)
println("求数学第一名的学生的各门成绩")
list.map(x=>{
(x._1,x._2.toInt,x._3,x._4)
}).sortBy(_._2,false)
.collect().take(1).foreach(println)
println("求总分第一名的学生的各们成绩")
list.map(x=>{
(x._1,x._2,x._3,x._4,(x._2.toInt+x._3.toInt+x._4.toInt))
}).sortBy(_._5,false)
.collect().take(1).foreach(println)
println("使用累加器求每个学生的总成绩,不使用累加器不得分")
val leijia = context.collectionAccumulator("leijia")
list.groupBy(_._1).map(x=>{
val tuples = x._2.map(x => {
(x._2.toInt, x._3.toInt, x._4.toInt)
}).map(x=>{
x._1+x._2+x._3
})
(x._1,tuples)
}).foreach(println)
}
}