一、说明
1、Scala 的正则表达式继承了 Java 的语法规则;
2、Scala 通过 scala.util.matching 包中的 Regex 类来支持正则表达式;
二、demo演示
1、代码
package com.hn.test
import scala.util.matching.Regex
object TestRegex {
def main(args: Array[String]): Unit = {
//val p1: Regex = "scala".r //.r可以将字符串转化Regex对象
val p1: Regex = new Regex("scala") //也可以直接使用对象
val str="scala is a good language,scala is best"
val iterator: Regex.MatchIterator = p1.findAllIn(str)
println(iterator.mkString(","))
val matches: Iterator[Regex.Match] = p1.findAllMatchIn(str)
println(matches.mkString(","))
val maybeString: Option[String] = p1.findFirstIn(str)
println(maybeString)
val s: String = p1.replaceAllIn(str,"java")
println(s)
val s1: String = p1.replaceFirstIn(str,"java")
println(s1)
}
}
2、输出
scala,scala
scala,scala
Some(scala)
java is a good language,java is best
java is a good language,scala is best
三、小示例
1、代码
package com.hn.test
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object Test extends App {
val conf: SparkConf = new SparkConf().setAppName("testRegular").setMaster("local[1]")
val sc: SparkContext = new SparkContext(conf)
sc.setLogLevel("WARN")
val value: RDD[String] = sc.parallelize(Array("1","2","3","4","5","2","2","3"))
//对2和3进行计数统计
val v_rdd: RDD[(String, Int)] = value
//正则表达式进行过滤
.filter("[2|3]".r.findAllMatchIn(_).size > 0)
.map((_,1)).reduceByKey(_+_)
println(v_rdd.foreachPartition(x=>x.foreach(x=>{
println(x._1+"_"+x._2)
})))
}
2、输出
2_3
3_2
()