指定列名添加Schema
package SparkSql
import org.apache.spark
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
import java.util.Properties
import org.apache.avro.ipc.specific.Person
import org.apache.spark.sql.{
DataFrame, SaveMode, SparkSession}
object CreateDFDS {
def main(args: Array[String]): Unit = {
import org.apache.spark
val spark= SparkSession.builder().master("local[*]")
.appName("Spark Sql basic example")
.getOrCreate()
val sc = spark.sparkContext
val data: RDD[String] = sc.textFile("input/tt.txt")
val lineRDD: RDD[Array[String]] = data.map(_.split(" "))
val rowRDD= lineRDD.map(line =>(line(0).toInt,line(1),line(2).toInt))
import spark.implicits._
val personDF: DataFrame = rowRDD.toDF()
personDF.show(10)
personDF.printSchema()
sc.stop()
spark.stop()
}
}
StructType指定Schema-了解
package day0413
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types.{
IntegerType, StringType, StructField, StructType}
import org.apache.spark.sql.{
DataFrame, Row, SparkSession}
object CreateDFDS2 {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession.builder().master("local[*]").appName("SparkSQL").getOrCreate</