package com.zhengkw.spark.sql
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.apache.spark.sql.{Row, SparkSession}
/**
* Author zhengkw
* Date 2020/5/14 10:31
*/
object RDD2DF_2 {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession
.builder()
.master("local[*]")
.appName("RDD2DF_2")
.getOrCreate()
// 1. 创建RDD[Row]
val list = Array((10, "lisi"), (20, "zs"), (15, "ww"))
var rdd = spark.sparkContext.parallelize(list).map{
case (age, name) =>
Row(age, name)
}
// 2. 给row的每个位置设置具体类型(schema) 设置列名和列的值的类型
val schema = StructType(Array(StructField("age", IntegerType), StructField("name", StringType)))
// 创建df
val df = spark.createDataFrame(rdd, schema)
df.show
spark.close()
}
}
RDD转DF方式2,定义Schema
最新推荐文章于 2021-11-25 15:54:45 发布