object RDDImplict{
implicit class RDDOps[T](rdd: RDD[T]) {
def partitionBy(f: T => Boolean): (RDD[T], RDD[T]) = {
val passes = rdd.filter(f)
val fails = rdd.filter(e => !f(e)) // Spark doesn't have filterNot
(passes, fails)
}
}}
import RDDImplict._
val (matches, matchesNot) = sc.parallelize(1 to 100).cache().partitionBy(_ % 2 == 0)
object BadImplicit {
implicit class DataFrameTransforms(df: DataFrame) {
def withGreeting(): DataFrame = {
df.withColumn("greeting", lit("hello world"))
}
def withFarewell(): DataFrame = {
df.withColumn("farewell", lit("goodbye"))
}
}
}
import BadImplicit._
val df = Seq(
"funny",
"person"
).toDF("something")
val hiDf = df.withGreeting().withFarewell()