一、利用ArrayBuffer写UDF处理数据生成Array数据类型
1、核心代码:
//导入包
import scala.collection.mutable.ArrayBuffer
//定义一个可变的ArrayBuffer
var result = ArrayBuffer[Int]()
//追加数据
result += userType
//转为Array
result.toArray
2、实践代码示例:
//in scala
package com.toby.gao.scala
import java.text.SimpleDateFormat
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import org.apache.spark.sql.types.StructField
import org.apache.spark.sql.functions.{collect_set, collect_list}
import scala.collection.mutable.ArrayBuffer
/**
* @param targetCode 目标code
* @param startDate 统计开始日期
* @param endDate 统计结束日期
*/
class GetNewConsumers(targetCode:Int,
startDate:String,
endDate:String)(implicit spark: SparkSession) extends Serializable {
/**
* 给根据用户特征进行编码:[类型:0 ,1, 2, 3, 4, 5]
*/
def tagUserType(brandCode:Int,thirdCateCode:Int,firstCateCode:Int,orderDateSeq:Seq[Int]):Array[Int] = {
val defaultType = 0
val userType = 1
var result = ArrayBuffer[Int]()
for (windowType <- Seq(-1,-365,-180,0)){
val (logicalStartDate,logicalEndDate) = getLogicalDate(startDate,endDate,windowType)
var flag =0
for (i <- orderDateSeq.indices if flag ==0){
val dealDate = dateFormatTransfer(orderDateSeq(i)+ "") //注意购买时间序列中日期是Int类型
if (dealDate >=logicalStartDate && dealDate<=logicalEndDate){
flag = 1
}
}
if (flag == 1) {
result += userType
} else {
result += defaultType
}
}
result.toArray
}
//注册UDF
spark.udf.register("tagUserType",tagUserType(_:Int,_:Int,_:Int,_:Seq[Int]))
//SparkSql中使用UDF
def getUserType ={
val getUserTypeSql:String =
s"""
|SELECT
|user_id,
|tagUserType(brand_code,third_cate_code,first_cate_code,order_dt_seq) as user_type_list
|from $table_name
|""".stripMargin
println(getUserTypeSql)
val userTypeDf:DataFrame = spark.sql(getUserTypeSql)
userTypeDf.cache()
userTypeDf.show(50,false)
}
二、利用ArrayBuffer写UDF处理数据生成Array数据类型
1、核心代码:
//定义一个可变的ListBuffer
val result = scala.collection.mutable.ListBuffer[Int]()
//追加数据
result.append(userType)
//转为List
result.toList
2、实践代码示例: 同上一
三、利用ArrayBuffer写UDF处理数据生成Array数据类型
1、核心代码:
//定义一个可变的Map
val map = scala.collection.mutable.Map[Int,String]()
//追加数据
map += (windowType -> userType)
//转为Map
map.toMap
2、实践代码示例: 同上一