添加pom
<dependency> <groupId>com.alibaba.fastjson2</groupId> <artifactId>fastjson2</artifactId> <version>2.0.20</version> </dependency>
由于后端开发要求,要把数据转成json给他。这里转成JSON。
import com.alibaba.fastjson2.{JSON, JSONObject}
import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Row}
/**
* 自定义聚合函数实现行转列
*/
object AverageUserDefinedAggregateFunction extends UserDefinedAggregateFunction {
//聚合函数输入数据结构
override def inputSchema: StructType = StructType(StructField("input", StringType) :: Nil)
//缓存区数据结构
override def bufferSchema: StructType = StructType(StructField("result", StringType) :: Nil)
//结果数据结构
override def dataType: DataType = StringType
// 是否具有唯一性
override def deterministic: Boolean = true
//初始化
override def initialize(buffer: MutableAggregationBuffer): Unit = {
buffer(0) = ""
}
//数据处理 : 必写,其它方法可选,使用默认
override def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
if (input.isNullAt(0)) return
if (buffer.getString(0) == null || buffer.getString(0).equals("")) {
buffer(0) = input.getString(0) //拼接字符串
} else {
buffer(0) = buffer.getString(0) + "," + input.getString(0) //拼接字符串
}
}
override def merge(bufferLeft: MutableAggregationBuffer, bufferRight: Row): Unit = {
//创建json对象
var result = new JSONObject
//获取数据
val data: String = bufferRight.getString(0)
//处理多批数据
if(data.contains(",")){
val dataStr: Array[String] = data.split("\\,")
for (i <- 0 to dataStr.length - 1){
val dayStr: Array[String] = dataStr(i).split("\\|")
if (bufferLeft(0) == null || bufferLeft(0).equals("")) {
result.put(dayStr(0),dayStr(1))
//将数据放入缓冲区
bufferLeft(0) = result.toString() //拼接字符串
} else {
//将缓冲区的数据赋值给data
result = JSON.parseObject(bufferLeft(0).toString)
//将新数据再put到json里
result.put(dayStr(0),dayStr(1))
//将数据放入缓冲区
bufferLeft(0) = result.toString() //拼接字符串
}
}
}
//处理单批数据
else{
val dataStr: Array[String] = data.split("\\|")
if (bufferLeft(0) == null || bufferLeft(0).equals("")) {
result.put(dataStr(0),dataStr(1))
//将数据放入缓冲区
bufferLeft(0) = result.toString() //拼接字符串
} else {
//将缓冲区的数据赋值给data
result = JSON.parseObject(bufferLeft(0).toString)
//将新数据再put到json里
result.put(dataStr(0),dataStr(1))
//将数据放入缓冲区
bufferLeft(0) = result.toString() //拼接字符串
}
}
}
// override def merge(bufferLeft: MutableAggregationBuffer, bufferRight: Row): Unit ={
// if(bufferLeft(0) == null || bufferLeft(0).equals("")){
// bufferLeft(0) = bufferRight.getString(0) //拼接字符串
// }else{
// bufferLeft(0) = bufferLeft(0) + "," + bufferRight.getString(0) //拼接字符串
// }
// }
//计算结果
override def evaluate(buffer: Row): Any = buffer.getString(0)
}
//注册UDAF函数实现行转列。
spark.udf.register("concat_ws", AverageUserDefinedAggregateFunction)
select reg_time,concat_ws(order_time) as dayN from test group by reg_time