基于scala的StructuredStream开发实例如下:
import org.apache.spark.sql.functions._
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.types._
import org.apache.spark.sql.expressions.MutableAggregationBuffer
import org.apache.spark.sql.expressions.UserDefinedAggregateFunction
import org.apache.spark.sql.Row
object MyAverage extends UserDefinedAggregateFunction {
// Data types of input arguments of this aggregate function
def inputSchema: StructType = StructType(StructField("rf", FloatType) :: StructField("doa", FloatType) :: Nil)
// Data types of values in the aggregation buffer
def bufferSchema: StructType = {
StructType(StructField("sum", FloatType) :: StructField("count", LongType) :: Nil)
}
// The data type of the returned value
def dataType: DataType = FloatType
// Whether this function always returns the same output on the identical input
def deterministic: Boolean =