Spark Sql 加载json文件

package spark.demo.sql

import org.apache.spark.sql.{ SparkSession}
import org.apache.spark.{ SparkConf, SparkContext }

object SqlJsonDemo {

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("Simple Application").setMaster("local[2]")
    val sc = new SparkContext(conf)

    val sparkSession = SparkSession.builder().appName("JSONDataSource").getOrCreate()
    val dataFrameReader = sparkSession.read
    //    val dataset = dataFrameReader.json("/data/resources/people.json")
    val dataset = dataFrameReader.json("resources/people.json")
    dataset.printSchema

    //
    dataset.createTempView("people")
    val teenagers = sparkSession.sql("SELECT name FROM people WHERE age >= 13 AND age <= 99")
    val names = teenagers.rdd.map(row => {
      "name:" + row.getString(0)
    })
    names.foreach(println)

    //dataset.write.format("json").mode(SaveMode.Overwrite).save("student")

    import sparkSession.implicits._

    // A JSON dataset is pointed to by path.
    // The path can be either a single text file or a directory storing text files
    val path = "resources/people.json"
    val peopleDF = sparkSession.read.json(path)

    // The inferred schema can be visualized using the printSchema() method
    peopleDF.printSchema()
    // root
    //  |-- age: long (nullable = true)
    //  |-- name: string (nullable = true)

    // Creates a temporary view using the DataFrame
    peopleDF.createOrReplaceTempView("people")

    // SQL statements can be run by using the sql methods provided by spark
    val teenagerNamesDF = sparkSession.sql("SELECT name,borthday,to_timestamp(borthday,'yyyy-MM-dd') aaa,to_date(borthday) FROM people WHERE age BETWEEN 13 AND 19")
    teenagerNamesDF.show()
    // +------+
    // |  name|
    // +------+
    // |Justin|
    // +------+

    // Alternatively, a DataFrame can be created for a JSON dataset represented by
    // a Dataset[String] storing one JSON object per string
    val otherPeopleDataset = sparkSession.createDataset(
      """{"name":"Yin","address":{"city":"Columbus","state":"Ohio"}}""" :: Nil)
    val otherPeople = sparkSession.read.json(otherPeopleDataset)
    otherPeople.show()
    
    otherPeople.createOrReplaceTempView("peopleT")
    otherPeople.printSchema()
    
    val result = sparkSession.sql("SELECT * FROM peopleT ")
    
    result.show()
  }

}

 

更多代码请参考:https://github.com/hsn999/spark-demo

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值