package Batch2
import org.apache.spark.sql.types._
import org.apache.spark.sql.{DataFrame, Dataset, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.functions._
/**
* 新方式分析json
*/
object JSONAnalysis1 {
def main(args: Array[String]): Unit = {
//conf
val conf = new SparkConf()
.setMaster("local[*]")
.setAppName(this.getClass.getSimpleName)
//sparkcontext
val sc = new SparkContext(conf)
//sqlContext
val sql = new SQLContext(sc)
import sql.implicits._
//存储数据
val dataSet1: Dataset[String] =Seq(
"""
|{
|"dc_id": "dc-101",
|"source": {
| "sensor-igauge": {
| "id": 10,
| "ip": "68.28.91.22",
| "description": "Sensor attached to the container ceilings",
| "temp":35,
| "c02_level": 1475,
| "geo": {"lat":38.00, "long":97.00}
| },
| "sensor-ipad": {
| "id": 13,
| "ip": "67.185.72.1",
| "description": "Sensor ipad attached to carbon cylinders",
| "temp": 34,
| "c02_level": 1370,
| "geo": {"lat":47.41, "long":-122.00}
| },
| "sensor-inest": {
| "id": 8,
| "ip": "208.109.163.218",
| "description": "Sensor attached to the factory ceilings",
| "temp": 40,
| "c02_level": 1346,
| "geo": {"lat":33.61, "long":-111.89}
| },
| "sensor-istick": {
| "id": 5,
| "ip": "204.116.105.67",
| "description": "Sensor embedded in exhaust pipes in the ceilings",
| "temp": 40,
| "c02_level": 1574,
| "geo": {"lat":35.93, "long":-85.46}
| }
| }
|}
""".stripMargin).toDS()
//定义schema信息
val schema = new StructType()
.add("dc_id",StringType)
.add("source",MapType(StringType,
new StructType()
.add("id",LongType)
.add("ip",StringType)
.add("description",StringType)
.add("temp",LongType)
.add("c02_level",LongType)
.add("geo",
new StructType()
.add("lat",DoubleType)
.add("long",DoubleType))
))
//转成dataframe
val dataDF: DataFrame = sql.read.schema(schema).json(dataSet1)
//炸裂
val explodeDF = dataDF.select($"dc_id",explode($"source"))
//直接查找某些值
explodeDF.select($"value".getItem("id").alias("id"),
$"value".getItem("ip").alias("ip"),
'value.getItem("description") as 'des).show(false)
//释放资源
sc.stop()
}
}