SparkSql1.x dataFrame创建方式

最新推荐文章于 2024-05-30 10:56:18 发布

哈哈xxy

最新推荐文章于 2024-05-30 10:56:18 发布

阅读量145

点赞数

分类专栏： bigdata 文章标签： sparksql spark dataframe

本文链接：https://blog.csdn.net/weixin_43761300/article/details/95480166

版权

bigdata 专栏收录该内容

12 篇文章 0 订阅

订阅专栏

SparkSql1.x dataFrame创建方式

1.使用Scala方式的方式来创建DataFrame

	case class People(name:String,age:Int)

	val conf = new SparkConf().setMaster("local[2]").setAppName("反射方式创建DataFrame")
    val sc = new SparkContext(conf)
    val sqlContext = new SQLContext(sc)
    val file: RDD[String] = sc.textFile("E://demo.txt")
    val peopleRdd: RDD[People] = file.map(_.split(" ")).map(p => People(p(0), p(1).toInt))
    import sqlContext.implicits._
    val peopleDF: DataFrame = peopleRdd.toDF()
    peopleDF.show()
    peopleDF.createOrReplaceTempView("people")
    val frame: DataFrame = sqlContext.sql("select * from people")
    frame.show()

2.使用StructType的方式创建DataFrame的方式来创建DataFrame

	val conf: SparkConf = new SparkConf().setAppName("StructType的方式创建DataFrame").setMaster("local[2]")
    val sc = new SparkContext(conf)
    val sqlContext = new SQLContext(sc)
    import org.apache.spark.sql.Row
    import org.apache.spark.sql.types.{StructField, StructType,StringType,IntegerType}
    val file: RDD[String] = sc.textFile("E://demo.txt")
    val rowRDD = file.map(_.split(" ")).map(
      x=>{
        Row(x(0),x(1).toInt)
      }
    )
    val schema = StructType(
//      List(
//        StructField("name", StringType, true),
//        StructField("age", IntegerType, true)
//      )
      StructField("name", StringType, true)::
      StructField("age", IntegerType, true)::Nil
    )
    val peopleDF: DataFrame = sqlContext.createDataFrame(rowRDD,schema)
    peopleDF.show()

3.加载json文件、csv文件、jdbc连接数据库等方式来创建DataFrame

	val conf: SparkConf = new SparkConf().setAppName("通过json文件的方式创建DataFrame").setMaster("local[2]")
    val sc = new SparkContext(conf)
    sc.setLogLevel("WARN")
    val sqlContext = new SQLContext(sc)
    //加载方式1
    val frame: DataFrame = sqlContext.read.json("E://people.json")
    //加载方式2
    //val frame: DataFrame = sqlContext.read.format("json").load("E:// people.json")
    //val frame: DataFrame = sqlContext.read.parquet("E://people.parquet")
    frame.createOrReplaceTempView("people")
    sqlContext.sql("select * from people").show()
    //保存方式
    frame.write.json("E://test.json")
    frame.write.csv("E://test.csv")
    frame.write.parquet("E://test.parquet")
    frame.write.format("json").save("E://test1.json")

jdbc方式

    val conf = new SparkConf().setMaster("local[2]").setAppName("加载jdbc数据源")
    val sc = new SparkContext(conf)
    val sqlContext = new SQLContext(sc)
    val url = "jdbc:mysql://ip:3306/mk"
    val table = "user"
    val properties = new Properties()
    properties.setProperty("user","youuser")
    properties.setProperty("password","youpassword")
    properties.setProperty("driver","com.mysql.jdbc.Driver")
    //方式1
    val df = sqlContext.read.jdbc(url,table,properties)
    //方式2
    // val jdbcDF = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/***")  //*****这是数据库名
    //      .option("driver", "com.mysql.jdbc.Driver").option("dbtable", "****")//*****是表名
    //      .option("user", "*****").option("password", "*****").load()
    df.createOrReplaceTempView("dbs")
    sqlContext.sql("select count(1) from dbs").show()