Spark---Datasource(JSON)---Scala

package com.spark.sparksql.datasource.scala

import org.apache.spark.sql.types.{StructType, IntegerType, StringType, StructField}
import org.apache.spark.sql.{SaveMode, Row, SQLContext}
import org.apache.spark.{SparkContext, SparkConf}

/**
  * Created by root on 2017/8/9.
  */
object JSONDataSource {

  def main(args: Array[String]) {
    val conf = new SparkConf().setAppName("JSONDataSource").setMaster("local")
    val sc = new SparkContext(conf)
    val sqlContext = new SQLContext(sc)

    import sqlContext.implicits._

    val studentScoresDF = sqlContext.read.json("students.json")
    studentScoresDF.registerTempTable("student_scores")
    val goodStudentNamesDF = sqlContext.sql("select name, score from student_scores where score >= 80")
    val goodStudentNames = goodStudentNamesDF.map(x => x(0)).collect()

    var studentInfoJSONs = List[String]()
    studentInfoJSONs = studentInfoJSONs.::("{\"name\":\"Yasaka\",\"age\":18}")
    studentInfoJSONs = studentInfoJSONs.::("{\"name\":\"Xuruyun\",\"age\":17}")
    studentInfoJSONs = studentInfoJSONs.::("{\"name\":\"Liangyongqi\",\"age\":19}")

    val studentInfosRDD = sc.parallelize(studentInfoJSONs)
    val studentInfosDF = sqlContext.read.json(studentInfosRDD)
    studentInfosDF.registerTempTable("student_infos")

    var sql = "select name, age from student_infos where name in ("
    var i=0
    for(name <- goodStudentNames){
      sql += "'" + name + "'"
      if (i < goodStudentNames.length - 1){
        sql += ","
      }
      i += 1
    }
    sql = sql + ")"

    val goodStudentInfosDF = sqlContext.sql(sql)

    val goodStudentsRDD = studentScoresDF.map(x => (x(0),x(1))).join(goodStudentInfosDF.map(x => (x(0),x(1))))
    val goodStudentsRDDRow = goodStudentsRDD.map(x=> Row(x._1.toString, x._2._1.toString.toInt, x._2._2.toString.toInt))

    var arr = Array(StructField("name",StringType,true)
                    ,StructField("score",IntegerType,true)
                    ,StructField("age",IntegerType,true))
    val structType = StructType(arr)

    val goodStudentDF = sqlContext.createDataFrame(goodStudentsRDDRow, structType)
    goodStudentDF.write.format("json").mode(SaveMode.Overwrite).save("goodStudentJson")

  }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值