彻底征服Scala模式匹配和类型系统
第一阶段:Spark streaming、spark sql、kafka、spark内核原理(必须有一个大型项目经验);
第二阶段:spark运行的各种环境,各种故障的解决,性能优化(精通spark内核、运行原理);
第三阶段:流处理、机器学习为鳌头,需要首先掌握前两个阶段的内容;
跟随王家林老师的零基础讲解,注重动手实战,成为spark高数,笑傲大数据之林!
第一部分学习笔记
//scala模式匹配类似java中的switch case
//java中的switch case只对值进行匹配
//scala中的模式匹配比java强很多不仅可以对值匹配还可以对类型、集合(map,list中的元素)
//按值进行模式匹配
def bigData(data : String): Unit ={
data match {
case "Spark" => println("Wow!!!") //不需要break
case "Hadoop" => println("Ok")
case _ => println("Something others")//不满足以上情况的所有情况
}
}
//bigData("Hadoop")
//bigData("Flink")
def bigData2(data : String): Unit ={
data match {
case "Spark" => println("Wow!!!") //不需要break
case "Hadoop" => println("Ok")
case _ if data == "Flink" => println("Cool")//加入表达式
case data_ if data_ == "Java" => println("haha" + " : " + data_)//data_变量来接收模式传进来的内容
case _ => println("Something others")
}
}
//bigData2("Spark")
//bigData2("Flin")
//bigData2("Flink")
//bigData2("Java")
//按类型进行模式匹配
def exception(e: Exception): Unit ={
e match {
case fileException : FileNotFoundException => println("File not found :" + fileException)
case _ : Exception => println("Exception getting thread dump from executor Sex",e)
}
}
//exception(new FileNotFoundException("oops"))
//对scala中的集合进行模式匹配 //set map 相同
def data(array : Array[String]): Unit ={
array match {
case Array("Scala") => println("Scala")
case Array(spark,hadoop,flink) => println(spark + " : " + hadoop + " : " + flink)
case Array("Spark", _*) => println("Spark...")
case _ => println("Unknown")
}
}
data(Array("Scala"))
data(Array("Spark","Hadoop","Flink"))
data(Array("Spark"))
//case class模式匹配 样例类
//1、case class相当于java中的bean
//2、实例化
//case class Person(name : String)//自动会使用val修饰,会生成伴生对象,apply方法
case class Person()
case class Worker(name : String, salary: Double)
case class Student(name : String, score :Double) extends Person
def sayHi(person : Person){
person match{
case Student(name,score) => println(name + " : " + score)
case Worker(name,salary) => println(name + " : " + salary)
case _ => println("Unknown")
}
}
sayHi(Student("Saprk",6.6))
def sayHi2(person : Person){
person match{
case Student(name,score) => println("I am student : " + name + " : " + score)
case Worker(name,salary) => println("I am worker : " + name + " : " + salary)
case _ => println("Unknown")
}
}
sayHi2(Student("Saprk",6.6))
//模式匹配 Some和None
/*private val restServer =
if (restServerEnabled) {
val port = conf.getInt("spark.master.rest.port", 6066)
Some(new StandaloneRestServer(host, port, conf, self, masterUrl))
} else {
None
}*/
/**
* 类型参数
* 1、泛型
*/
//泛型类和泛型函数
class Person1[T](val content : T){
def getContent(id : T) = id + " _ " + content
}
val p = new Person1[String]("Spark")
println(p.getContent("Scala"))
//上边界 codec: Class[_ <:] CompressionCodec 类型直接的父子关系(上下边界)
//下边界 codec: Class[_ :>] CompressionCodec
//view Bounds 视图界定 支持对类型隐式转换 (判断上界下届)
//view Bounds 语法 <% 对类型进行隐式转换
//implicit def rddToSequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable: ClassTag](
//Writable: ClassTag
//T:类型
//类型[T]
//在上下文中注入隐式值,而且注入的过程是自动
class Compare[T : Ordering](val n1: T, val n2 : T){
def bigger(implicit ordered : Ordering[T]) = if(ordered.compare(n1,n2) > 0) n1 else n2
}
println(new Compare[Int](8,3).bigger)
println(new Compare[String]("Spark","Hadoop").bigger)
Ordering[String]
Ordering[Int]
new Compare[String]("Spark","Hadoop").bigger
//Manifest Context Bounds //泛型数组
//[T : Manifest]
//Array[T]
/**
* class Person[+T] 逆变和协变 子类
*/
//[_] 相当于[T] Dependency[_]相当于Dependency[T]
//Manifest -> ClassTag(Manifest 演变为ClassTag)
//bstract class RDD[T: ClassTag](
//@transient private var _sc: SparkContext,
//@transient private var deps: Seq[Dependency[_]]
//T:ClassTag 根据运行时判断类型
/**
* {{{
* scala> def mkArray[T : ClassTag](elems: T*) = Array[T](elems: _*)
* mkArray: [T](elems: T*)(implicit evidence$1: scala.reflect.ClassTag[T])Array[T]
*
* scala> mkArray(42, 13)
* res0: Array[Int] = Array(42, 13)
*
* scala> mkArray("Japan","Brazil","Germany")
* res1: Array[String] = Array(Japan, Brazil, Germany)
* }}}
*/
第二部分作业:阅读Spark源码 RDD、HadoopRDD、SparkContext、Master、Worker的源码,并分析里面使用的所有的模式匹配和类型参数的内容