import java.text.SimpleDateFormat import java.util.Date import java.util.concurrent.{Callable, Executors, FutureTask} import com.alibaba.fastjson.{JSON, JSONArray, JSONObject} import com.fengtu.sparktest.aoi.GetGjAoiInterface2 import org.apache.commons.lang.StringUtils import org.apache.log4j.Logger import org.apache.spark.rdd.RDD import org.apache.spark.sql.functions._ import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession} import org.apache.spark.storage.StorageLevel import scala.collection.mutable.ArrayBuffer object AoiAccuracyInterfaceTest7 { @transient lazy val logger: Logger = Logger.getLogger(AoiAccuracyInterfaceTest7.getClass) val appName: String = this.getClass.getSimpleName.replace("$", "") def main(args: Array[String]): Unit = { //val inc_day ="20201107" //val str = GetGjAoiInterface.getContent(","{\"inc_day\":\"20201107\",\"un\":\"40398702\",\"bn\":\"571FE\",\"tp\":\"20201107\",\"tracks\":[{\"ac\":\"12\",\"x\":\"120.488271\",\"y\":\"30.256971\",\"tm\":\"1604716191\",\"t_time\":\"2020/11/7 10:29\"},{\"ac\":\"13\",\"x\":\"120.488068\",\"y\":\"30.256066\",\"tm\":\"1604716311\",\"t_time\":\"2020/11/7 10:31\"},{\"ac\":\"30\",\"x\":\"120.493522\",\"y\":\"30.260036\",\"tm\":\"1604737005\",\"t_time\":\"2020/11/7 16:16\"},{\"ac\":\"7\",\"x\":\"120.493521\",\"y\":\"30.259342\",\"tm\":\"1604737041\",\"t_time\":\"2020/11/7 16:17\"},{\"ac\":\"12\",\"x\":\"120.492902\",\"y\":\"30.2581\",\"tm\":\"1604737084\",\"t_time\":\"2020/11/7 16:18\"}]}") val inc_day = args(0) //val inc_day = "20201129" start(inc_day) } def start(inc_day: String): Unit = { val spark = SparkSession .builder() .appName("SparkDecode") .master("yarn") .enableHiveSupport() .config("hive.exec.dynamic.partition",true) .config("hive.exec.dynamic.partition.mode","nonstrict") .getOrCreate() spark.sparkContext.setLogLevel("ERROR") // val array = Array("20201213","20201214") val service = Executors.newFixedThreadPool(6) val list = new java.util.ArrayList[Callable[Void]]() spark.read.format("csv").option("header", "true").load("/user/01401062/upload/gis/project/aoi/aoiAccuracy/areaAddrTest.csv").rdd.map( x => { x.getString(0) }).collect().foreach(x => { println(x) list.add(new Callable[Void] { override def call(): Void = { startSta(spark,inc_day,x) null }}) }) service.invokeAll(list) service.shutdown() logger.error("统计完毕") spark.stop() } //private val gjUrl ="http://" private val gjUrl ="http:// case class res( un:String, bn:String, inc_day_gj:String, fix:String, fixX:String, fixY:String, aoiid:String, aoiname:String, fatype:String, faname:String, interval:String, dis:String, speed:String, tm:String, t_time:String, tp:String, aoicode:String, response:String, input:String ) case class re2( un:String, tp:String, zx:String, zy:String, ac:String, tm:String, t_time:String, inc_day:String, id:String, bt:String, sc:String ) case class resAll( un:String, tp:String, zx:String, zy:String, ac:String, tm:String, bn:String, t_time:String, inc_day_gj:String, fix:String, fixX:String, fixY:String, aoiid:String, aoiname:String, fatype:String, faname:String, interval:String, dis:String, speed:String, aoicode:String, response:String, input:String ) def startSta(spark:SparkSession,inc_day:String,citycode:String): Unit ={ val dataStartTime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").getCalendar.getTimeInMillis val delSql = s""" |alter table dm_gis.aoi_accuracy_gj_detail_new drop if exists partition (inc_day='${inc_day}',citycode='${citycode}') """.stripMargin spark.sql(delSql) logger.error("删除分区:" + delSql) val trajSql = s""" |select un,bn,tp,zx,zy,ac,tm,from_unixtime(cast(tm as int), 'yyyy/MM/dd HH:mm:ss') as t_time,inc_day,id,bt,sc,regexp_replace(substring(bn,0,4),'[a-zA-Z]*','') citycode | from |dm_gis.esg_gis_loc_trajectory |where | inc_day ='${inc_day}' |and | regexp_replace(substring(bn,0,4),'[a-zA-Z]*','') = '${citycode}' |and | ak =1 """.stripMargin //logger.error("轨迹sql为:" + trajSql) logger.error("轨迹sql为" + trajSql) val trajCalPre = spark.sql(trajSql).persist(StorageLevel.MEMORY_AND_DISK_SER) logger.error("输入数据量为" + trajCalPre.count()) trajCalPre.take(2).foreach(println(_)) val trajCalPre2 = trajCalPre.rdd.map(x => { val jo = new JSONObject() val names = Array("un","bn","tp","zx","zy","ac","tm","t_time","inc_day","id","bt","sc","citycode") for(i <- names.indices) jo.put(names(i),x.getString(i)) jo }).persist(StorageLevel.MEMORY_AND_DISK_SER) trajCalPre2.take(4).foreach(println(_)) logger.error("输入数据量为" + trajCalPre2.count()) trajCalPre2.take(2).foreach(println(_)) val trajCal =trajCalPre2.map(x =>{ val un = x.getString("un") val bn = x.getString("bn") val inc_day = x.getString("inc_day") val citycode = x.getString("citycode") ((un,bn,inc_day,citycode),x) }).groupByKey().map( x=> { val list = x._2.toList.sortBy(_.getString("tm")) val joArray = new JSONArray val un = x._1._1 val bn = x._1._2 val inc_day =x._1._3 val citycode = x._1._4 for (i <- 0 until(list.length)) { val jo = new JSONObject() val zx = list(i).getDouble("zx") val zy = list(i).getDouble("zy") val ac= list(i).getString("ac") val tm = list(i).getBigInteger("tm") val tp = list(i).getBigInteger("tp") val t_time = list(i).getString("t_time") jo.put("x",zx) jo.put("y",zy) jo.put("ac",ac) jo.put("tm",tm) jo.put("t_time",t_time) jo.put("tp",tp) joArray.add(jo) } val json = new JSONObject() json.put("un",un) json.put("bn",bn) json.put("inc_day",inc_day) json.put("citycode",citycode) json.put("tracks",joArray) json }).persist(StorageLevel.MEMORY_AND_DISK_SER) logger.error("处理后的轨迹的输入量为" + trajCal.count()) val gjUrlBroadcast= spark.sparkContext.broadcast(gjUrl) import spark.implicits._ val callStartTime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").getCalendar.getTimeInMillis val dataReadyTime = ((callStartTime - dataStartTime) /1000) / 60.0 logger.error(citycode + "城市数据准备耗时为:" + dataReadyTime.formatted("%.2f")) logger.error(citycode + "开始调用轨迹端口时间为:" + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date)) val traj_res = trajCal.repartition(20).flatMap( x => { val url = gjUrlBroadcast.value val json = GetGjAoiInterface2.getContent(url,x) val buffer = new ArrayBuffer[res] if(StringUtils.isNotEmpty(json) && json.contains("tracks") && json.contains("inc_day")){ val jo = JSON.parseObject(json) val un = jo.getString("un") val bn = jo.getString("bn") val inc_day_gj = jo.getString("inc_day") val array = jo.getJSONArray("tracks") for(i <- 0 until(array.size())){ val jo1 = array.getJSONObject(i) val fix = jo1.getString("fix") val fixX = jo1.getString("fixX") val fixY = jo1.getString("fixY") val aoiid = jo1.getString("aoiid") val aoiname = jo1.getString("aoiname") val fatype = jo1.getString("fatype") val faname = jo1.getString("faname") val interval = jo1.getString("interval") val dis = jo1.getString("dis") val speed = jo1.getString("speed") val aoicode = jo1.getString("aoicode") val tm = jo1.getString("tm") val t_time = jo1.getString("t_time") val tp = jo1.getString("tp") buffer.append(res(un,bn,inc_day_gj,fix,fixX,fixY,aoiid,aoiname,fatype,faname,interval,dis,speed,tm,t_time,tp,aoicode,"Correct","")) } }else{ val un = x.getString("un") val bn = x.getString("bn") val inc_day_gj = x.getString("inc_day") if(StringUtils.isEmpty(json)){ buffer.append(res(un,bn,inc_day_gj,"","","","","","","","","","","","","","","Empty",x.toJSONString)) }else{ buffer.append(res(un,bn,inc_day_gj,"","","","","","","","","","","","","","",json,x.toJSONString)) } } buffer }).distinct().toDF() logger.error("traj_res数量为:"+ traj_res.count()) val trajCalPre3 = trajCalPre2.map( x =>{ val un = x.getString("un") val bn = x.getString("bn") val inc_day = x.getString("inc_day") val t_time = x.getString("t_time") val tp = x.getString("tp") ((un,bn,inc_day,t_time,tp),x) }).distinct() val resAllDf = traj_res.rdd.map(x => { val jo = new JSONObject() val names = Array("un","bn","inc_day_gj","fix","fixX","fixY","aoiid","aoiname","fatype","faname","interval","dis","speed","tm","t_time","tp","aoicode","response","input") for(i <- names.indices) jo.put(names(i),x.getString(i)) val un = x.getString(0) val bn = x.getString(1) val inc_day_gj = x.getString(2) val tm = x.getString(13) val t_time =x.getString(14) val tp = x.getString(15) ((un,bn,inc_day_gj,t_time,tp),jo) }).leftOuterJoin(trajCalPre3).map(x => { val leftBody = x._2._1 val rightBody = x._2._2 val un =leftBody.getString("un") val bn =leftBody.getString("bn") val inc_day_gj =leftBody.getString("inc_day_gj") val fix =leftBody.getString("fix") val fixX =leftBody.getString("fixX") val fixY =leftBody.getString("fixY") val aoiid =leftBody.getString("aoiid") val aoiname =leftBody.getString("aoiname") val fatype =leftBody.getString("fatype") val faname =leftBody.getString("faname") val interval =leftBody.getString("interval") val dis =leftBody.getString("dis") val speed =leftBody.getString("speed") val tm =leftBody.getString("tm") val t_time =leftBody.getString("t_time") val tp =leftBody.getString("tp") val aoicode =leftBody.getString("aoicode") val response =leftBody.getString("response") val input =leftBody.getString("input") val rightBodyNew = rightBody.getOrElse(new JSONObject()) val zx = rightBodyNew.getString("zx") val zy = rightBodyNew.getString("zy") val ac = rightBodyNew.getString("ac") resAll(un,tp,zx,zy,ac,tm,bn,t_time,inc_day_gj,fix,fixX,fixY,aoiid,aoiname,fatype,faname,interval,dis,speed,aoicode,response,input) }).distinct().toDF().persist(StorageLevel.MEMORY_AND_DISK_SER) logger.error(citycode +"调用接口返回的数据量为:" + resAllDf.count()) val callEndTime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").getCalendar.getTimeInMillis val timeDiff = ((callEndTime - callStartTime) / 1000 ) / 60.0 logger.error(citycode + "调用轨迹端口结束时间为:" + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date)) logger.error(citycode + "城市端口调用时间为:" + timeDiff.formatted("%.2f") ) trajCalPre.unpersist() resAllDf.take(2).foreach(println(_)) var numPartiton = scala.math.ceil((resAllDf.count() / 500000)).toInt if(numPartiton <= 1) numPartiton = 5 logger.error(citycode+ "城市分区数为:" + numPartiton) resAllDf.withColumn("inc_day",lit(inc_day)).withColumn("citycode",lit(citycode)).repartition(numPartiton).write.mode(SaveMode.Append).insertInto("dm_gis.aoi_accuracy_gj_detail_new") resAllDf.unpersist() } }
2021-06-25
最新推荐文章于 2024-08-11 02:57:18 发布