package com.lenovo.ftp
import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import scala.collection.mutable.ListBuffer
class SparkMapArrayToDF {
def main(args: Array[String]): Unit = {
val ss = SparkSession
.builder()
.appName("SparkMapArrayToDF")
.master("local")
//.config("spark.sql.warehouse.dir", "file:///D://lenovo_pj//cpp//cpp")
//.enableHiveSupport()
.getOrCreate()
val arr = Array("aa/bb/cc/dd/ee","mm/dd/nn/ff/hh")
val str = "record_date/client/langu/district/description"
val columnArr = str.toString.split("/")
var structFieldList = new ListBuffer[StructField]()
for(i <- 0 until columnArr.length){
structFieldList += StructField(columnArr(i),StringType,true)
}
val schema = StructType(structFieldList)
val values = ss.sparkContext
.parallelize(arr)
.map(row => {
var arr = row.split("/")
Row.fromSeq(arr.toSeq)
})
ss.createDataFrame(values,schema)
.createOrReplaceTempView("test")
}
}
spark map函数中数组转元祖(Row)以及schema信息转DF
最新推荐文章于 2024-07-24 10:46:45 发布