新方式分析JSON(三范式)---

 

package Batch2

import org.apache.spark.sql.types._
import org.apache.spark.sql.{DataFrame, Dataset, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.functions._
/**
  * 新方式分析json
  */
object JSONAnalysis1 {
  def main(args: Array[String]): Unit = {
    //conf
    val conf = new SparkConf()
      .setMaster("local[*]")
      .setAppName(this.getClass.getSimpleName)

    //sparkcontext
    val sc = new SparkContext(conf)
    //sqlContext
    val sql = new SQLContext(sc)
    import sql.implicits._

    //存储数据
    val dataSet1: Dataset[String] =Seq(
      """
        |{
        |"dc_id": "dc-101",
        |"source": {
        |    "sensor-igauge": {
        |      "id": 10,
        |      "ip": "68.28.91.22",
        |      "description": "Sensor attached to the container ceilings",
        |      "temp":35,
        |      "c02_level": 1475,
        |      "geo": {"lat":38.00, "long":97.00}
        |    },
        |    "sensor-ipad": {
        |      "id": 13,
        |      "ip": "67.185.72.1",
        |      "description": "Sensor ipad attached to carbon cylinders",
        |      "temp": 34,
        |      "c02_level": 1370,
        |      "geo": {"lat":47.41, "long":-122.00}
        |    },
        |    "sensor-inest": {
        |      "id": 8,
        |      "ip": "208.109.163.218",
        |      "description": "Sensor attached to the factory ceilings",
        |      "temp": 40,
        |      "c02_level": 1346,
        |      "geo": {"lat":33.61, "long":-111.89}
        |    },
        |    "sensor-istick": {
        |      "id": 5,
        |      "ip": "204.116.105.67",
        |      "description": "Sensor embedded in exhaust pipes in the ceilings",
        |      "temp": 40,
        |      "c02_level": 1574,
        |      "geo": {"lat":35.93, "long":-85.46}
        |    }
        |  }
        |}
      """.stripMargin).toDS()

    //定义schema信息
    val schema = new StructType()
      .add("dc_id",StringType)
      .add("source",MapType(StringType,
        new StructType()
              .add("id",LongType)
              .add("ip",StringType)
              .add("description",StringType)
              .add("temp",LongType)
              .add("c02_level",LongType)
              .add("geo",
                new StructType()
                  .add("lat",DoubleType)
                  .add("long",DoubleType))
          ))

      //转成dataframe
    val dataDF: DataFrame = sql.read.schema(schema).json(dataSet1)
      //炸裂
      val explodeDF = dataDF.select($"dc_id",explode($"source"))

    //直接查找某些值
    explodeDF.select($"value".getItem("id").alias("id"),
    $"value".getItem("ip").alias("ip"),
    'value.getItem("description") as 'des).show(false)

    //释放资源
    sc.stop()

  }
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值