root
|-- trackNo: string (nullable = true)
|-- shipClassType: string (nullable = true)
|-- labelId: long (nullable = true)
|-- Latest_status_time: timestamp (nullable = true)
|-- Billing_start_time: timestamp (nullable = true)
|-- days: integer (nullable = true)
|-- time: string (nullable = true)
|-- Logistics_status: string (nullable = true)
val rdd88 = rdd24.withColumn("com", struct("time", "Logistics_status")) //设置array里面的字段
.groupBy("trackNo", "shipClassType", "labelId","Latest_status_time","Billing_start_time","days")
.agg(collect_list("com").as("commm")) //修改array字段名
root
|-- trackNo: string (nullable = true)
|-- shipClassType: string (nullable = true)
|-- labelId: long (nullable = true)
|-- Latest_status_time: timestamp (nullable = true)
|-- Billing_start_time: timestamp (nullable = true)
|-- days: integer (nullable = true)
|-- commm: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- time: string (nullable = true)
| | |-- Logistics_status: string (nullable = true)
spark中DF创建数据结构里面增加array字段(直接上干货)
最新推荐文章于 2023-06-10 11:30:42 发布