一、SparkUtils工具类
import org.apache.spark.{SparkConf, SparkContext}
object SparkUtils {
/**
* 默认的master url路径
*/
val DEFAULT_MASTER = "local[*]"
/**
* 默认master为local[*]的获取sparkContext
*/
def getSparkContext(appName:String):SparkContext = getSparkContext(appName, DEFAULT_MASTER)
def getSparkContext(appName:String, master:String):SparkContext = new SparkContext(new SparkConf().setAppName(appName).setMaster(master))
/**
* 释放sparkContext
*/
def close(sc:SparkContext) = if(sc != null) sc.stop()
}
二、日志工具
import org.apache.log4j.{Level, Logger}
trait LoggerTrait {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.apache.hadoop").setLevel(Level.WARN)
Logger.getLogger("org.spark_project").setLevel(Level.WARN)
}
三、Spark算子GroupByKey
import cn.qphone.spark.common.LoggerTrait.LoggerTrait
import cn.qphone.spark.common.Utils.SparkUtils
import org.apache.spark.rdd.RDD
object Deom8_groupByKey extends LoggerTrait{
def main(args: Array[String]): Unit = {
//1.sparkcontext获取
val sc = SparkUtils.getSparkContext("Deom8_groupByKey")
//2.数据 id,name,age,class
val stuList = List(
"1 令狐冲 22 华山",
"2 岳不群 38 华山",
"3 虚竹 33 逍遥",
"4 乔峰 40 丐帮",
"5 黄蓉 33 桃花岛",
"6 杨过 11 古墓",
"7 小龙女 13 古墓",
"8 郭靖 34 丐帮"
)
val stuRDD: RDD[String] = sc.parallelize(stuList)
val stusRDD: RDD[(String, (String, String, String))] = stuRDD.map(line => {
val lines: Array[String] = line.split("\\s+")
(lines(3), (lines(0), lines(1), lines(2)))
})
val groupByKeyRDD: RDD[(String, Iterable[(String, String, String)])] = stusRDD.groupByKey()
groupByKeyRDD.foreach(println)
//6.释放资源
SparkUtils.close(sc)
}
}