package com.shujia.core
import com.shujia.core.Demo10Join.Student
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD
object Demo16Action {
def main(args: Array[String]): Unit = {
//常见的Action算子
//foreach take collect count reduce save保存
//创建Spark Context
val conf: SparkConf = new SparkConf()
conf.setAppName("Demo16Action")
conf.setMaster("local")
val sc: SparkContext = new SparkContext(conf)
val stuRDD: RDD[Student] = sc
.textFile("Spark/data/students.txt")
.map(line => {
val splits: Array[String] = line.split(",")
val id: String = splits(0)
val name: String = splits(1)
val age: Int = splits(2).toInt
val gender: String = splits(3)
val clazz: String = splits(4)
Student(id, name, age, gender, clazz)
})
//foreach 处理每一条数据 没有返回值
// stuRDD.foreach(println)
//take
stuRDD
.take(10)//从RDD中取前N条数据 会返回一个Array数组
.foreach(println)//这里的foreach是Array的方法
//collect
stuRDD
.collect()//将RDD所有的数据变成一个Array
// .foreach(println)//这里的foreach是Array的方法
//count获取RDD中数据的条数
val cnt: Long = stuRDD
.count()
println(cnt)
//reduce 全局聚合 相当于把所有的数据当作一个组
//select sum(age) from student group by 1
val sumAge: Int = stuRDD
.map(stu => stu.age)
.reduce(_ + _)
println(sumAge)
}
}
Student(1500100001,施笑槐,22,女,文科六班)
Student(1500100002,吕金鹏,24,男,文科六班)
Student(1500100003,单乐蕊,22,女,理科六班)
Student(1500100004,葛德曜,24,男,理科三班)
Student(1500100005,宣谷芹,22,女,理科五班)
Student(1500100006,边昂雄,21,男,理科二班)
Student(1500100007,尚孤风,23,女,文科六班)
Student(1500100008,符半双,22,女,理科六班)
Student(1500100009,沈德昌,21,男,理科一班)
Student(1500100010,羿彦昌,23,男,理科六班)1000
22521
package com.shujia.core
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import scala.util.Random
object Demo17Pi {
def main(args: Array[String]): Unit = {
//随机生成1w范围为(-1,1)个点
var cnt:Int=1
val total:Int=100
while(cnt <= total){
val x: Double = Random.nextDouble()*2-1//0~1的小数
val y: Double = Random.nextDouble()*2-1
// println((x,y))
cnt += 1
}
//[0,10]
for (i <- 0 to 10){
// println(i)
}
//[0,10)
for(i <- 0 until 10){
// println(i)
}
//[0,10)
for(i <- Range(0,10,1)){
// println(i)
}
val range: Range = 0 until 10
//通过可以遍历的序列创建RDD
val conf: SparkConf = new SparkConf()
conf.setAppName("Demo017Pi")
conf.setMaster("local[*]")
val sc: SparkContext = new SparkContext(conf)
val pointCnt:Int=100000
val mapTaskNums:Int=10
val pointRDD: RDD[(Double,Double)] = sc.parallelize(1 to mapTaskNums * pointCnt,mapTaskNums)
//总共有 mapTaskNums * pointCnt 条数据
.map(i=>{
val x: Double = Random.nextDouble()*2-1//0~1的小数
val y: Double = Random.nextDouble()*2-1
(x,y)
})
//统计点落在圆内的数量
val pointCntRDD: RDD[(String, Int)] = pointRDD
.map {
case (x: Double, y: Double) =>
if (x * x + y * y <= 1) {
("圆内", 1)
} else {
("圆外", 1)
}
}.coalesce(2).reduceByKey(_ + _)
pointCntRDD.foreach(println)
val pointInCycleCnt: Int = pointCntRDD
.filter(kv => "圆内".equals(kv._1))
.collect()(0)._2
println(s"圆内的点的数量:$pointInCycleCnt")
//计算Pi
val Pi: Double = pointInCycleCnt.toDouble * 4 / (mapTaskNums * pointCnt)
println(s"Pi的值为:$Pi")
}
}
(圆外,214784)
(圆内,785216)圆内的点的数量:785216
Pi的值为:3.140864