spark多线程的案例

package com.fengtu.sparktest.aoiTest

import java.text.SimpleDateFormat
import java.util.Date
import java.util.concurrent.{Callable, Executors, FutureTask}

import com.alibaba.fastjson.{JSON, JSONArray, JSONObject}
import com.fengtu.sparktest.aoi.GetGjAoiInterface2
import org.apache.commons.lang.StringUtils
import org.apache.log4j.Logger
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.functions._
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
import org.apache.spark.storage.StorageLevel

import scala.collection.mutable.ArrayBuffer

object AoiAccuracyInterfaceTest7 {
  @transient lazy val logger: Logger = Logger.getLogger(AoiAccuracyInterfaceTest7.getClass)
  val appName: String = this.getClass.getSimpleName.replace("$", "")
  def main(args: Array[String]): Unit = {
    //val inc_day ="20201107"
    //val str = GetGjAoiInterface.getContent("http://100.118.72.24:8088/adjusttrack","{\"inc_day\":\"20201107\",\"un\":\"40398702\",\"bn\":\"571FE\",\"tp\":\"20201107\",\"tracks\":[{\"ac\":\"12\",\"x\":\"120.488271\",\"y\":\"30.256971\",\"tm\":\"1604716191\",\"t_time\":\"2020/11/7 10:29\"},{\"ac\":\"13\",\"x\":\"120.488068\",\"y\":\"30.256066\",\"tm\":\"1604716311\",\"t_time\":\"2020/11/7 10:31\"},{\"ac\":\"30\",\"x\":\"120.493522\",\"y\":\"30.260036\",\"tm\":\"1604737005\",\"t_time\":\"2020/11/7 16:16\"},{\"ac\":\"7\",\"x\":\"120.493521\",\"y\":\"30.259342\",\"tm\":\"1604737041\",\"t_time\":\"2020/11/7 16:17\"},{\"ac\":\"12\",\"x\":\"120.492902\",\"y\":\"30.2581\",\"tm\":\"1604737084\",\"t_time\":\"2020/11/7 16:18\"}]}")
    val inc_day = args(0)
    //val inc_day = "20201129"
    start(inc_day)
  }

  def start(inc_day: String): Unit = {
    val spark = SparkSession
      .builder()
      .appName("SparkDecode")
      .master("yarn")
      .enableHiveSupport()
      .config("hive.exec.dynamic.partition",true)
      .config("hive.exec.dynamic.partition.mode","nonstrict")
      .getOrCreate()
    spark.sparkContext.setLogLevel("ERROR")
  //  val array = Array("20201213","20201214")


    val service = Executors.newFixedThreadPool(6)
    val list = new java.util.ArrayList[Callable[Void]]()

    spark.read.format("csv").option("header", "true").load("/user/01401062/upload/gis/project/aoi/aoiAccuracy/areaAddrTest.csv").rdd.map(
      x => {
        x.getString(0)
      }).collect().foreach(x => {
      println(x)
      list.add(new Callable[Void] {
        override def call(): Void = {
              startSta(spark,inc_day,x)
          null
        }})
    })


    service.invokeAll(list)
    service.shutdown()


    logger.error("统计完毕")
    spark.stop()

  }

  //private val gjUrl ="http://10.216.162.129:8099/adjusttrack"
  private val gjUrl ="http://10.216.22.56:8099/adjusttrack"

  case class res(
                  un:String,
                  bn:String,
                  inc_day_gj:String,
                  fix:String,
                  fixX:String,
                  fixY:String,
                  aoiid:String,
                  aoiname:String,
                  fatype:String,
                  faname:String,
                  interval:String,
                  dis:String,
                  speed:String,
                  tm:String,
                  t_time:String,
                  tp:String,
                  aoicode:String,
                  response:String,
                  input:String
                )

  case class re2(
                  un:String,
                  tp:String,
                  zx:String,
                  zy:String,
                  ac:String,
                  tm:String,
                  t_time:String,
                  inc_day:String,
                  id:String,
                  bt:String,
                  sc:String
                )

  case class resAll(
                     un:String,
                     tp:String,
                     zx:String,
                     zy:String,
                     ac:String,
                     tm:String,
                     bn:String,
                     t_time:String,
                     inc_day_gj:String,
                     fix:String,
                     fixX:String,
                     fixY:String,
                     aoiid:String,
                     aoiname:String,
                     fatype:String,
                     faname:String,
                     interval:String,
                     dis:String,
                     speed:String,
                     aoicode:String,
                     response:String,
                     input:String
                   )


  def startSta(spark:SparkSession,inc_day:String,citycode:String): Unit ={

    val dataStartTime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").getCalendar.getTimeInMillis
    val delSql =
      s"""
         |alter table dm_gis.aoi_accuracy_gj_detail_new drop if exists partition (inc_day='${inc_day}',citycode='${citycode}')
           """.stripMargin
    spark.sql(delSql)
    logger.error("删除分区:" + delSql)

    val trajSql =
      s"""
         |select  un,bn,tp,zx,zy,ac,tm,from_unixtime(cast(tm as int), 'yyyy/MM/dd HH:mm:ss') as t_time,inc_day,id,bt,sc,regexp_replace(substring(bn,0,4),'[a-zA-Z]*','') citycode
         |  from
         |dm_gis.esg_gis_loc_trajectory
         |where
         | inc_day ='${inc_day}'
         |and
         |  regexp_replace(substring(bn,0,4),'[a-zA-Z]*','') = '${citycode}'
         |and
         |  ak =1
             """.stripMargin
    //logger.error("轨迹sql为:" + trajSql)
    logger.error("轨迹sql为" + trajSql)
    val trajCalPre = spark.sql(trajSql).persist(StorageLevel.MEMORY_AND_DISK_SER)
    logger.error("输入数据量为" + trajCalPre.count())
    trajCalPre.take(2).foreach(println(_))

    val trajCalPre2 = trajCalPre.rdd.map(x => {
      val jo = new JSONObject()
      val names =  Array("un","bn","tp","zx","zy","ac","tm","t_time","inc_day","id","bt","sc","citycode")
      for(i <- names.indices) jo.put(names(i),x.getString(i))
      jo
    }).persist(StorageLevel.MEMORY_AND_DISK_SER)
    trajCalPre2.take(4).foreach(println(_))


    logger.error("输入数据量为" + trajCalPre2.count())
    trajCalPre2.take(2).foreach(println(_))

    val trajCal =trajCalPre2.map(x =>{
      val un = x.getString("un")
      val bn = x.getString("bn")
      val inc_day = x.getString("inc_day")
      val citycode = x.getString("citycode")

      ((un,bn,inc_day,citycode),x)
    }).groupByKey().map( x=> {
      val list = x._2.toList.sortBy(_.getString("tm"))
      val joArray = new JSONArray
      val un = x._1._1
      val bn = x._1._2
      val inc_day =x._1._3
      val citycode = x._1._4

      for (i <- 0 until(list.length)) {
        val jo = new JSONObject()
        val zx = list(i).getDouble("zx")
        val zy = list(i).getDouble("zy")
        val ac=  list(i).getString("ac")
        val tm = list(i).getBigInteger("tm")
        val tp = list(i).getBigInteger("tp")
        val t_time = list(i).getString("t_time")
        jo.put("x",zx)
        jo.put("y",zy)
        jo.put("ac",ac)
        jo.put("tm",tm)
        jo.put("t_time",t_time)
        jo.put("tp",tp)
        joArray.add(jo)
      }

      val json = new JSONObject()

      json.put("un",un)
      json.put("bn",bn)
      json.put("inc_day",inc_day)
      json.put("citycode",citycode)
      json.put("tracks",joArray)

      json
    }).persist(StorageLevel.MEMORY_AND_DISK_SER)
    logger.error("处理后的轨迹的输入量为" + trajCal.count())
    val gjUrlBroadcast= spark.sparkContext.broadcast(gjUrl)
    import spark.implicits._

    val callStartTime =  new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").getCalendar.getTimeInMillis
    val dataReadyTime =  ((callStartTime - dataStartTime) /1000) / 60.0

    logger.error(citycode + "城市数据准备耗时为:" + dataReadyTime.formatted("%.2f"))
    logger.error(citycode + "开始调用轨迹端口时间为:" +  new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date))


    val traj_res = trajCal.repartition(20).flatMap( x => {
      val url = gjUrlBroadcast.value
      val json = GetGjAoiInterface2.getContent(url,x)
      val buffer = new ArrayBuffer[res]
      if(StringUtils.isNotEmpty(json) && json.contains("tracks") && json.contains("inc_day")){
          val jo = JSON.parseObject(json)
          val un = jo.getString("un")
          val bn = jo.getString("bn")
          val inc_day_gj = jo.getString("inc_day")
          val array = jo.getJSONArray("tracks")
          for(i <- 0 until(array.size())){
            val jo1 = array.getJSONObject(i)
            val fix = jo1.getString("fix")
            val fixX = jo1.getString("fixX")
            val fixY = jo1.getString("fixY")
            val aoiid = jo1.getString("aoiid")
            val aoiname = jo1.getString("aoiname")
            val fatype = jo1.getString("fatype")
            val faname = jo1.getString("faname")
            val interval = jo1.getString("interval")
            val dis = jo1.getString("dis")
            val speed = jo1.getString("speed")
            val aoicode = jo1.getString("aoicode")

            val tm = jo1.getString("tm")
            val t_time = jo1.getString("t_time")
            val tp = jo1.getString("tp")
            buffer.append(res(un,bn,inc_day_gj,fix,fixX,fixY,aoiid,aoiname,fatype,faname,interval,dis,speed,tm,t_time,tp,aoicode,"Correct",""))
        }
      }else{
        val un = x.getString("un")
        val bn = x.getString("bn")
        val inc_day_gj = x.getString("inc_day")
        if(StringUtils.isEmpty(json)){
          buffer.append(res(un,bn,inc_day_gj,"","","","","","","","","","","","","","","Empty",x.toJSONString))
        }else{
          buffer.append(res(un,bn,inc_day_gj,"","","","","","","","","","","","","","",json,x.toJSONString))
        }
      }
      buffer
    }).distinct().toDF()
    logger.error("traj_res数量为:"+ traj_res.count())



    val trajCalPre3 = trajCalPre2.map(
      x =>{
        val un = x.getString("un")
        val bn = x.getString("bn")
        val inc_day = x.getString("inc_day")
        val t_time = x.getString("t_time")
        val tp = x.getString("tp")
        ((un,bn,inc_day,t_time,tp),x)
      }).distinct()


    val resAllDf = traj_res.rdd.map(x => {
      val jo = new JSONObject()
      val names =  Array("un","bn","inc_day_gj","fix","fixX","fixY","aoiid","aoiname","fatype","faname","interval","dis","speed","tm","t_time","tp","aoicode","response","input")
      for(i <- names.indices) jo.put(names(i),x.getString(i))
      val un = x.getString(0)
      val bn = x.getString(1)
      val inc_day_gj = x.getString(2)
      val tm = x.getString(13)
      val t_time =x.getString(14)
      val tp = x.getString(15)
      ((un,bn,inc_day_gj,t_time,tp),jo)
    }).leftOuterJoin(trajCalPre3).map(x => {
      val leftBody = x._2._1
      val rightBody = x._2._2
      val un =leftBody.getString("un")
      val bn =leftBody.getString("bn")
      val inc_day_gj =leftBody.getString("inc_day_gj")
      val fix =leftBody.getString("fix")
      val fixX =leftBody.getString("fixX")
      val fixY =leftBody.getString("fixY")
      val aoiid =leftBody.getString("aoiid")
      val aoiname =leftBody.getString("aoiname")
      val fatype =leftBody.getString("fatype")
      val faname =leftBody.getString("faname")
      val interval =leftBody.getString("interval")
      val dis =leftBody.getString("dis")
      val speed =leftBody.getString("speed")
      val tm =leftBody.getString("tm")
      val t_time =leftBody.getString("t_time")
      val tp =leftBody.getString("tp")
      val aoicode =leftBody.getString("aoicode")
      val response =leftBody.getString("response")
      val input =leftBody.getString("input")

      val rightBodyNew = rightBody.getOrElse(new JSONObject())
      val zx = rightBodyNew.getString("zx")
      val zy = rightBodyNew.getString("zy")
      val ac = rightBodyNew.getString("ac")

      resAll(un,tp,zx,zy,ac,tm,bn,t_time,inc_day_gj,fix,fixX,fixY,aoiid,aoiname,fatype,faname,interval,dis,speed,aoicode,response,input)

    }).distinct().toDF().persist(StorageLevel.MEMORY_AND_DISK_SER)

    logger.error(citycode +"调用接口返回的数据量为:" + resAllDf.count())

    val callEndTime =  new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").getCalendar.getTimeInMillis
    val timeDiff = ((callEndTime - callStartTime) / 1000 ) / 60.0
    logger.error(citycode + "调用轨迹端口结束时间为:" + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date))
    logger.error(citycode + "城市端口调用时间为:" + timeDiff.formatted("%.2f") )
    trajCalPre.unpersist()
    resAllDf.take(2).foreach(println(_))

    var numPartiton = scala.math.ceil((resAllDf.count() / 500000)).toInt
    if(numPartiton <= 1)
      numPartiton = 5
    logger.error(citycode+ "城市分区数为:" + numPartiton)

    resAllDf.withColumn("inc_day",lit(inc_day)).withColumn("citycode",lit(citycode)).repartition(numPartiton).write.mode(SaveMode.Append).insertInto("dm_gis.aoi_accuracy_gj_detail_new")
    resAllDf.unpersist()


  }



}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值