SparkContext
object SparkContextDemo extends App {
//创建一个spark context对象
val conf:SparkConf = new SparkConf()
.setMaster("local[2]")
.setAppName("sparkTest")
val sc:SparkContext = SparkContext.getOrCreate(conf)
sc.textFile("hdfs://hadoop1:9000/data/text.txt")
.flatMap(x=>x.split(" "))
.map(x=>(x,1)).reduceByKey(_+_)
.collect.foreach(println)
// //根据线程设置默认分区
// val rdd1: RDD[Int] = sc.parallelize(List(1,2,3,4,5,6,7,8,9))
// println(rdd1.partitions.size)
// //指定分区数量
// val rdd2: RDD[Int] = sc.parallelize(List(1,2,3,4,5,6),5)
// println(rdd2.partitions.size)
// //通过makeRDD创建RDD的分区数量,也是根据默认线程
// val rdd3: RDD[Int] = sc.makeRDD(List(1,2,3,4,5))
// println(rdd3.partitions.size)
sc.stop()
}
SparkSession
object SparkSessionDemo extends App {
val spark: SparkSession = SparkSession.builder()
.master("local[2]")
.appName("sparkSession")
.getOrCreate()
val sc: SparkContext = spark.sparkContext
sc.textFile("hdfs://hadoop1:9000/data/text.txt")
.flatMap(x=>x.split(" "))
.map(x=>(x,1))
.reduceByKey(_+_)
.collect
.foreach(println)
sc.stop()
}