udf
def filterTdWithOp(operator: String): Boolean = {
val x = ".*中国联通.*"
val y = ".*CHN-UNICOM.*"
val z = ".*China Unicom.*"
if (operator!=null && operator.matches(x + "|" + y + "|" + z))
true
else
false
}
sqlContext.udf.register("filterTdWithOp", filterTdWithOp _)
td.registerTempTable("td")
val tdOp = sqlContext.sql("select *,filterTdWithOp(operator) as fiOp from td").filter("fiOp = true")
udaf
去看官网比较好
object MaxPoint extends Aggregator[Input,Input, Input] {
override def zero: Input = Input(0,0,0,"")
// Combine two values to produce a new value. For performance, the function may modify `buffer`
// and return it instead of constructing a new object
override def reduce(b: Input, a: Input): Input = {
if(b.pointNum>a.pointNum)
b
else
a
}
// Merge two intermediate values
override def merge(b: Input, a: Input): Input = {
if(b.pointNum>a.pointNum)
b
else
a
}
override def finish(reduction: Input): Input = reduction
}
spark2.0之后引入了两个新方法