Spark消费kafka数据 json中包含数组的数据类型

4 篇文章 0 订阅
3 篇文章 0 订阅

JSON数据格式

{
    "header": {
        "traceId": "06ad872d5d5bfa0d", 
        "appName": "zeus-merchant", 
        "deviceType": null, 
        "version": null, 
        "userAgent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36", 
        "ip": "119.162.98.224", 
        "sysTime": "2021-06-11 13:36:27"
    }, 
    "body": [
        {
            "txnType": 1, 
            "txnCode": "10100720012021061111522994861596", 
            "txnSubCode": "18399", 
            "refCode": "10200720012021061113362702629760", 
            "txnValue": null, 
            "event": 1, 
            "subEvent": null, 
            "userId": "307426", 
            "extend": null
        }
    ]
}

代码:

import com.alibaba.fastjson.{JSON, JSONArray, JSONObject}
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.sql.{Row, SparkSession, functions}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}

import scala.collection.mutable.ArrayBuffer

object cainiao {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("zeus_mis streaming")
    conf.setMaster("local")
    conf.set("spark.testing.memory","2147480000")
    conf.set("spark.executor.instances", "4")
    conf.set("spark.executor.memory", "12g")
    conf.set("spark.driver.allowMultipleContexts", "true")
    conf.set("spark.cores.max","16")
    conf.set("spark.dynamicAllocation.enabled","false")
        conf.set("hive.metastore.uris", "thrift://172.16.9.241:9083")
//    conf.set("hive.metastore.uris", "thrift://192.168.249.212:9083")
        conf.set("spark.sql.warehouse.dir", "hdfs://172.16.9.241:8020/user/hive/warehouse/ods.db")
//    conf.set("spark.sql.warehouse.dir", "hdfs://192.168.249.212:8020/user/hive/warehouse/ods.db")

    // spark sql
    val spark = SparkSession
      .builder()
      .config(conf)
      .enableHiveSupport()
      .getOrCreate()
    // streaming connect
    val ssc = new StreamingContext(spark.sparkContext, Seconds(5))
    ssc.sparkContext.setLogLevel("ERROR")
//        val bootstrapServers = "172.16.9.240:9092,172.16.9.236:9092,172.16.9.237:9092"
    val bootstrapServers = "192.168.249.177:9092,192.168.249.128:9092,192.168.249.144:9092"
    val groupId = "test-20"
    val topicName = "ZEUS_MIS"
    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> bootstrapServers,
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> groupId,
      "auto.offset.reset" -> "latest", //latest自动重置偏移量为最新的偏移量    earliest
      "enable.auto.commit" -> (false: java.lang.Boolean)
    )
    val kafkaTopicDS = KafkaUtils.createDirectStream(
      ssc,
      LocationStrategies.PreferConsistent,
      ConsumerStrategies.Subscribe[String, String](Set(topicName), kafkaParams)
    )
    // transform
    val lineDStream = kafkaTopicDS.map(_.value())
    lineDStream.print()
    import spark.implicits._
    val data = lineDStream.foreachRDD(rdd=> {
      rdd.map(x => {
        val json = JSON.parseObject(x)
        val header = json.getJSONObject("header")
        var appName = header.get("appName")
        if (appName == null) {
          appName = "null"
        }
        var ip = header.get("ip")
        if (ip == null) {
          ip = "null"
        }
        var sysTime = header.get("sysTime")
        if (sysTime == null) {
          sysTime = "null"
        }
        var userAgent = header.get("userAgent")
        if (userAgent == null) {
          userAgent = "null"
        }
        var traceId = header.get("traceId")
        if (traceId == null) {
          traceId = "null"
        }
        var deviceType = header.get("deviceType")
        if (deviceType == null) {
          deviceType = "null"
        }
        var version = header.get("version")
        if (version == null) {
          version = "null"
        }
        val body = json.getJSONArray("body")
        val body_arr = ArrayBuffer[(String, String, String, String, String, String, String, String, String)]()
        for (i <- 0 until body.size()) {
          val obj = body.getJSONObject(i) //下表为 i的json对象
          var txnType = obj.getString("txnType")
          if (txnType == null) {
            txnType = "null"
          }
          var txnCode = obj.getString("txnCode")
          if (txnCode == null) {
            txnCode = "null"
          }
          var txnSubCode = obj.getString("txnSubCode")
          if (txnSubCode == null) {
            txnSubCode = "null"
          }
          var refCode = obj.getString("refCode")
          if (refCode == null) {
            refCode = "null"
          }
          var event = obj.getString("event")
          if (event == null) {
            event = "null"
          }
          var userId = obj.getString("userId")
          if (userId == null) {
            userId = "null"
          }
          var extend = obj.getString("extend")
          if (extend == null) {
            extend = "null"
          }
          var subEvent = obj.getString("event_track")
          if (subEvent == null) {
            subEvent = "null"
          }
          var txnValue = obj.getString("txnValue")
          if (txnValue == null) {
            txnValue = "null"
          }
          body_arr += ((txnType, txnCode, txnSubCode, refCode, txnValue, event, subEvent, userId, extend))
        }
        ((traceId, appName, deviceType, version, ip, sysTime, userAgent), body_arr)
      })
        .flatMap(x => {
          val data_arr = ArrayBuffer[(String, String, String, String, String, String, String, String, String, String, String, String, String, String, String, String)]()
          val header = x._1
          val body_arr = x._2.toArray
          for (elem <- body_arr) {
            data_arr += ((header._1.toString, header._2.toString, header._3.toString, header._4.toString, header._5.toString, header._6.toString, header._7.toString,
              elem._1, elem._2, elem._3, elem._4, elem._5, elem._6, elem._7, elem._8, elem._9))
          }
          data_arr
        }).toDF("traceId", "appName", "deviceType", "version", "ip", "sysTime", "userAgent","txnType", "txnCode", "txnSubCode", "refCode", "txnValue", "event", "subEvent", "userId", "extend").show()

    })
    ssc.start()
    ssc.awaitTermination()
  }

}

 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值