spark业务案例

package com.fengtu.sparktest.aoiTest

import java.net.URLEncoder

import com.alibaba.fastjson.serializer.SerializerFeature
import com.alibaba.fastjson.{JSON, JSONArray, JSONObject}
import com.fengtu.sparktest.utils.SparkUtils
import com.fengtu.sparktest.utils2.Utils
import org.apache.commons.lang.StringUtils
import org.apache.log4j.Logger
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.storage.StorageLevel

/*
*aoi真实准确率-判错工艺压审补
*/
object AoiRealAccturyRateJudgeWrong2Review {
  val appName: String = this.getClass.getSimpleName.replace("$", "")
  val logger: Logger = Logger.getLogger(appName)
  //关闭fastjson引用检测
  JSON.DEFAULT_GENERATE_FEATURE |= SerializerFeature.DisableCircularReferenceDetect.getMask
  val calPartition = 2000

  //图商接口
  val tsUrl = "http://gis-int.int.sfdc.com.cn:1080/geo/api?ak=3a191e7427e8470c86271a069411c66b&opt=%s&address=%s&city=%s"

  //坐标挂接aoi接口
  var getTrackAoisUrl = "http://10.119.72.206:9000/data/aoiSdk/getTrackAois?showAoiCode=1".stripMargin

  //审补
  val reviewUrl = "http://gis-cms-bg.sf-express.com/cms/api/address/rgsbAdd?cityCode=%s&address=%s&znoCode=%s&aoiId=%s&operSource=真实AOI准确率"
  val limitMin = 60000 / 60

  def main(args:Array[String])= {
    val spark = SparkSession.builder()
      .appName("SparkDecode")
      .master("yarn")
      .enableHiveSupport()
      .config("hive.exec.dynamic.partition",true)
      .config("hive.exec.dynamic.partition.mode","nonstrict")
      .getOrCreate()
    spark.sparkContext.setLogLevel( "ERROR" )

    val incDay = args(0)

    run(spark,incDay)
  }

  def run(spark:SparkSession,incDay:String): Unit = {
    //获取最后流程数据
    val ( aoiInfoRdd,aoicodeList,addrRaoifrequencyMapBroadCast) = getLastFlowData( spark,incDay)

    //获取aoicode对应的aoiid
    val aoicodeIdMapBroadCast = getAoicodeIdMap(spark,aoicodeList)

    //处理r_aoi和 finalaoi code
    val aoiInfoTmpRdd = processAoicode(aoicodeIdMapBroadCast,aoiInfoRdd)
    logger.error("aoiInfoTmpRdd的数据总量为:" + aoiInfoTmpRdd.count())
    aoiInfoTmpRdd.take(2).foreach(println(_))


    //获取aoicode对应区域数据
    val aoiAreaMapBroadCast = getAoiAreaMap( spark,aoicodeList )

    //处理压审补数据
    val reviewRdd = processReviewData( aoiInfoTmpRdd,aoiAreaMapBroadCast,addrRaoifrequencyMapBroadCast )

    aoicodeIdMapBroadCast.unpersist()
    addrRaoifrequencyMapBroadCast.unpersist()

    //进行压审补
    val saveRdd = doReview(reviewRdd )

    //入库
    SparkUtils.df2Hive (
      spark,saveRdd,schema,"append",
      "dm_gis.aoi_real_acctury_rate_judge_wrong_2_review","inc_day",incDay,logger
    )
  }

  val schema = StructType( List (
    StructField("waybillno", StringType, true),
    StructField("city_code", StringType, true),
    StructField("org_code", StringType, true),
    StructField("userid", StringType, true),
    StructField("operatime_new", StringType, true),
    StructField("delivery_lgt", StringType, true),
    StructField("delivery_lat", StringType, true),
    StructField("req_waybillno", StringType, true),
    StructField("req_destcitycode", StringType, true),
    StructField("req_addresseeaddr", StringType, true),
    StructField("req_comp_name", StringType, true),
    StructField("finalzc", StringType, true),
    StructField("finalaoicode", StringType, true),
    StructField("gisaoisrc", StringType, true),
    StructField("tag1", StringType, true),
    StructField("tag2", StringType, true),
    StructField("r_aoi", StringType, true),
    StructField("r_aoi_id", StringType, true),
    StructField("r_aoi_area", StringType, true),
    StructField("key_word", StringType, true),
    StructField("key_tag", StringType, true),
    StructField("mapa_aoiid", StringType, true),
    StructField("mapa_aoicode", StringType, true),
    StructField("mapa_aoiname", StringType, true),
    StructField("gd_coor_req", StringType, true),
    StructField("gd_coor_resp", StringType, true),
    StructField("gd_aoiid", StringType, true),
    StructField("gd_aoicode", StringType, true),
    StructField("gd_aoiname", StringType, true),
    StructField("gis_aoi_code", StringType, true),
    StructField("gis_aoi_name", StringType, true),
    StructField("80_aoi_code", StringType, true),
    StructField("80_aoi_name", StringType, true),
    StructField("gis_aoi_area", StringType, true),
    StructField("addr_r_aoi_freq", StringType, true),
    StructField("tag", StringType, true),
    StructField("step", StringType, true),
    StructField("bd_coor_req", StringType, true),
    StructField("bd_coor_resp", StringType, true),
    StructField("bd_aoiid", StringType, true),
    StructField("bd_aoicode", StringType, true),
    StructField("bd_aoiname", StringType, true),
    StructField("tc_coor_req", StringType, true),
    StructField("tc_coor_resp", StringType, true),
    StructField("tc_aoiid", StringType, true),
    StructField("tc_aoicode", StringType, true),
    StructField("tc_aoiname", StringType, true),
    StructField("coor_aoi_req", StringType, true),
    StructField("coor_aoi_resp", StringType, true),
    StructField("review_req", StringType, true),
    StructField("review_resp", StringType, true),
    StructField("inc_day", StringType, true)
  ) )

  /**
   * 获取需要获取区域的aoicode集合 和 aoi真实著虐率最后流程数据
   * @param spark
   * @param incDay
   */
  def getLastFlowData( spark:SparkSession,incDay:String ) = {
    logger.error("开始获取判错运营工艺数据")

    val querySql =
      s"""
         |select
         |  *
         |from
         |  dm_gis.aoi_real_acc_rate_9
         |where inc_day= '$incDay' and tag1='wrong'
         |""".stripMargin

    logger.error(querySql)

    val df = spark.sql(querySql).persist(StorageLevel.MEMORY_AND_DISK_SER)

    val sourRdd = SparkUtils.getRowToJson(df,calPartition)

    //获取需要获取区域的aoiid集合
    val aoicodeList = sourRdd.flatMap(obj => Array(obj.getString("r_aoi"),obj.getString("finalaoicode")))
      .filter( aoicode => StringUtils.isNotEmpty(aoicode) ).distinct().collect().toList

    //聚合地址去重计算r_aoi频次
    val addrRaoifrequencyMap = sourRdd.map(obj => ( obj.getString("req_addresseeaddr"),obj.getString("r_aoi") )).filter( obj => StringUtils.isNotEmpty(obj._1) )
      .groupByKey().map(obj => (obj._1,obj._2.toList.distinct.size)).collect().toMap

    ( sourRdd,aoicodeList,spark.sparkContext.broadcast( addrRaoifrequencyMap ))
  }

  /**
   * 获取aoi对应的aoi区域
   * @param spark
   * @param aoicodeList
   * @return
   */
  def getAoiAreaMap( spark:SparkSession,aoicodeList:List[String] ) = {
    logger.error("开始获取aoiid对应的aoi区域编码")

    var querySql =
      s"""
         |select
         | aoi_id,
         | aoi_area_code
         |from
         |dm_tc_waybillinfo.aoi_area_aoi
         |""".stripMargin

    if ( aoicodeList.size < 100000 && aoicodeList.size > 0 ){
      querySql =
        s"""
           |$querySql
           |where aoi_id in ('${aoicodeList.mkString("','")}')
           |""".stripMargin
    }

    logger.error(querySql)

    val aoiAreaMap = spark.sql(querySql).rdd.map( obj => {
      ( obj.getString(0),obj.getString(1) )
    }).collect().toMap

    logger.error(s"共获取aoi区域:${aoiAreaMap.size}")

    spark.sparkContext.broadcast(aoiAreaMap)
  }


  /**
   * 处理压审补逻辑
   * @param aoiInfoRdd
   * @param aoiAreaMapBroadCast
   * @param addrRaoifrequencyMapBroadCast
   * @return
   */
  def processReviewData(
                     aoiInfoRdd:RDD[JSONObject],
                     aoiAreaMapBroadCast:Broadcast[Map[String, String]],
                     addrRaoifrequencyMapBroadCast: Broadcast[Map[String, Int]]
                   ) ={
    logger.error("开始处理要进行审补的数据")

    val reviewRdd = aoiInfoRdd.mapPartitionsWithIndex((index, iter) => {
      val aoiAreaMap = aoiAreaMapBroadCast.value
      val addrRaoifrequencyMap = addrRaoifrequencyMapBroadCast.value

      var startTime = System.currentTimeMillis()
      var cnt = 0

      for (obj <- iter) yield {
        val r_aoi = obj.getString("r_aoi")
        val r_aoi_id = obj.getString("r_aoi_id")
        val finalaoicode = obj.getString("finalaoicode")
        val finalaoiid = obj.getString("finalaoiid")
        val req_addresseeaddr = obj.getString("req_addresseeaddr")
        val gisaoisrc = obj.getString("gisaoisrc")
        val req_destcitycode = obj.getString("req_destcitycode")
        val gd_aoiid = obj.getString("gd_aoiid")

        var r_aoi_areacode = ""
        var finalaoicodeNew = ""

        if( StringUtils.isNotEmpty(r_aoi) && r_aoi.length > 8){
          r_aoi_areacode = r_aoi.substring(0, r_aoi.length() - 6)
        }else{
          r_aoi_areacode = r_aoi
        }

        if( StringUtils.isNotEmpty(finalaoicode) && r_aoi.length > 8){
          finalaoicodeNew = finalaoicode.substring(0, r_aoi.length() - 6)
        }else{
          finalaoicodeNew = finalaoicode
        }

        val finalzc = obj.getString("finalzc")

        val r_aoi_area = aoiAreaMap.getOrElse(r_aoi_id, "")
        val gis_aoi_area = aoiAreaMap.getOrElse(finalaoiid, "")
        val addrRaoifrequency = addrRaoifrequencyMap.getOrElse(req_addresseeaddr, -1)
        obj.put("r_aoi_area", r_aoi_area)
        obj.put("gis_aoi_area", gis_aoi_area)
        obj.put("addr_r_aoi_freq", addrRaoifrequency)

        //finalaoi的AOI区域与r_aoi的AOI区域一致?
        if (StringUtils.isNotEmpty(r_aoi_area) && StringUtils.isNotEmpty(gis_aoi_area) && r_aoi_area.equals(gis_aoi_area)) {

          //finalaoi的AOI网点与r_aoi的AOI网点一致?
          if (StringUtils.isNotEmpty(finalaoicodeNew) && StringUtils.isNotEmpty(r_aoi_areacode) && r_aoi_areacode.equals(finalaoicodeNew)) {
            obj.put("tag", "no_chkn")
          } else {
            //gisaoisrc来源为normcompany、normhp
            //或工单对应r_aoi频次>=2
            //或工单结尾词为:村/工业区/工业园/工业园区/产业园?
            if (Array("normcompany", "normhp").contains(gisaoisrc) || addrRaoifrequency >= 2 ||
              (StringUtils.isNotEmpty(req_addresseeaddr)
                && req_addresseeaddr.matches("(.*村$)|(.*工业区$)|(.*工业园$)|(.*工业园区$)|(.*产业园$)"))) {
              obj.put("tag", "no_chkn")
              obj.put("step", "1")
            } else
              obj.put("tag", "ok_chkn")
          }
        } else {
          //限制ak访问量
          if (cnt == limitMin) {
            val endTime = System.currentTimeMillis() - startTime
            if (endTime <= 60 * 1000) {
              logger.error(s"分区$index,每分钟访问量超过限制$limitMin,休眠${60 * 1000 - endTime} ms中")
              Thread.sleep(60 * 1000 - endTime)
            }
            startTime = System.currentTimeMillis()
            cnt = 0
          }

          //跑图商(高德/百度/腾讯)获取坐标
          val (resJsonList, trackArr, cntTmp) = getTsCoor(gd_aoiid, obj, req_addresseeaddr, req_destcitycode)
          cnt += cntTmp

          //坐标挂接aoi
          getAoiByCoor(trackArr, resJsonList, obj, gd_aoiid)

          //gd_aoiid=bd_aoiid=tc_aoiid&gd_aoiid不为空?
          if (
            StringUtils.isNotEmpty(obj.getString("gd_aoiid")) &&
              obj.getString("gd_aoiid").equals(obj.getString("bd_aoiid")) &&
              obj.getString("bd_aoiid").equals(obj.getString("tc_aoiid"))
          ) {
            if (
              Array("normcompany", "normhp").contains(gisaoisrc) ||
                (StringUtils.isNotEmpty(req_addresseeaddr) && req_addresseeaddr.matches("(.*村$)|(.*工业区$)|(.*工业园$)|(.*工业园区$)|(.*产业园$)"))
            ) {
              obj.put("tag", "no_chkn")
              obj.put("step", "3")
            } else
              obj.put("tag", "ts_chkn")
          } else {
            obj.put("tag", "no_chkn")
            obj.put("step", "2")
          }
        }

        obj
      }
    }).persist(StorageLevel.DISK_ONLY)

    logger.error(s"共处理数据:${reviewRdd.count()}")

    reviewRdd
  }


  /**
   * http get请求异常时,重试三次
   * @param url
   * @param opt
   * @param address
   * @param city
   * @param isSleep
   * @return
   */
  def getDataFromUrlRetry3(obj:JSONObject,typeStr:String,url:String,opt:String,address:String,city:String,isSleep:Boolean=false) = {
    var icnt = 0
    var resp = ""
    val req = String.format ( url,opt,URLEncoder.encode( address,"utf-8" ) ,city )

    while ( icnt <= 5 &&  StringUtils.isEmpty(resp) ) {
      resp = try { Utils.retryGet( req ) } catch { case e:Exception => e.getMessage() }

      icnt += 1

      if ( ! resp.contains( "\"status\":0")  && ! resp.contains( "\"err\":-105") ) {
        logger.error(s"${req}接口请求异常:\n$resp,正在重试第${icnt}次")
        resp = ""

        if(isSleep)
          Thread.sleep(100)
      }
    }

    val resJson = new JSONObject()
    parseCoor( resp,resJson )

    obj.put(s"${typeStr}_coor_req",req)
    obj.put(s"${typeStr}_coor_resp",resJson)
    resJson.put("prefix",typeStr)

    resJson
  }


  /**
   * 解析图商接口返回的坐标
   * @param respJsonStr
   * @param obj
   * @return
   */
  def parseCoor( respJsonStr:String, obj:JSONObject ) ={
    val (x,y,precision) = (
      try { JSON.parseObject(respJsonStr).getJSONObject("result").getString("xcoord")} catch { case _ =>"" },
      try { JSON.parseObject(respJsonStr).getJSONObject("result").getString("ycoord")} catch { case _ =>"" },
      try { JSON.parseObject(respJsonStr).getJSONObject("result").getString("precision")} catch { case _ =>"" }
    )

    obj.put( "xcoord",x )
    obj.put( "ycoord",y )
    obj.put( "precision",precision )

    obj
  }

  /**
   * 地址请求图商坐标
   * @param gd_aoiid
   * @param obj
   * @param req_addresseeaddr
   * @param req_destcitycode
   * @return
   */
  def getTsCoor( gd_aoiid:String,obj:JSONObject,req_addresseeaddr:String,req_destcitycode: String ) = {
    var gdResJson = new JSONObject()
    var bdResJson = new JSONObject()
    var tcResJson = new JSONObject()
    var cnt = 0

    if ( StringUtils.isEmpty( gd_aoiid ) ) {
      //获取gd坐标
      gdResJson = getDataFromUrlRetry3(obj,"gd",tsUrl,"gd2",req_addresseeaddr,req_destcitycode,true)
      cnt += 1
    }

    //如果gd坐标不为空且precision=2(高精)
    if (
        StringUtils.isNotEmpty( gd_aoiid ) ||
        (
          StringUtils.isNotEmpty(gdResJson.getString("xcoord")) &&
          StringUtils.isNotEmpty(gdResJson.getString("ycoord")) &&
          "2".equals(gdResJson.getString("precision"))
        )
    ){
      //获取百度坐标
      bdResJson = getDataFromUrlRetry3(obj,"bd",tsUrl,"bd2",req_addresseeaddr,req_destcitycode,true)
      cnt += 1

      //如果bd坐标不为空且precision=2(高精)
      if (
          StringUtils.isNotEmpty(bdResJson.getString("xcoord")) &&
          StringUtils.isNotEmpty(bdResJson.getString("ycoord")) &&
          "2".equals(gdResJson.getString("precision"))
      ){
        //获取腾讯坐标
        tcResJson = getDataFromUrlRetry3(obj,"tc",tsUrl,"tc2",req_addresseeaddr,req_destcitycode,true)
        cnt += 1
      }
    }

    val trackArr = new JSONArray()

    //将需要获取aoi的坐标一起返回
    if (
        StringUtils.isNotEmpty(bdResJson.getString("xcoord")) &&
        StringUtils.isNotEmpty(bdResJson.getString("ycoord")) &&
        "2".equals(gdResJson.getString("precision")) &&
        StringUtils.isNotEmpty(tcResJson.getString("xcoord")) &&
        StringUtils.isNotEmpty(tcResJson.getString("ycoord")) &&
        "2".equals(tcResJson.getString("precision"))
    ) {
      val bdJson = new JSONObject()
      bdJson.put( "lat",bdResJson.getString("ycoord" ))
      bdJson.put( "lng",bdResJson.getString("xcoord") )

      val tcJson = new JSONObject()
      tcJson.put( "lat",tcResJson.getString("ycoord" ))
      tcJson.put( "lng",tcResJson.getString("xcoord") )

      trackArr.add(bdJson)
      trackArr.add(tcJson)

      if( StringUtils.isEmpty( gd_aoiid ) ){
        val gdJson = new JSONObject()
        gdJson.put( "lat",gdResJson.getString("ycoord" ))
        gdJson.put( "lng",gdResJson.getString("xcoord") )

        trackArr.add(gdJson)
      }
    }

    (List(gdResJson,bdResJson,tcResJson),trackArr,cnt)
  }


  /**
   * 点面挂接aoi
   * @param trackArr
   * @param resJsonList
   * @param obj
   */
  def getAoiByCoor( trackArr:JSONArray, resJsonList:List[JSONObject], obj:JSONObject, gd_aoiid:String ) = {
    //坐标挂接aoi
    var rsp = ""

    if ( trackArr != null && trackArr.size > 0 ) {
      var cnt = 0
      while ( cnt < 3 && StringUtils.isEmpty(rsp) ) {
        cnt += 1

        rsp = doPost(trackArr)
        if( ! rsp.contains("\"code\":200") ) {
          logger.error(s"接口请求异常$rsp,正在重试第${cnt}次")
          rsp = ""
        }
      }
    }

    var trackMap = Map[(String,String),(String,String,String)]()

    //解析轨迹所在aoi
    if ( StringUtils.isNotEmpty(rsp) && rsp.contains("\"code\":200") ) {
      val respJsonArr = try { JSON.parseObject(rsp).getJSONArray("data") } catch { case _ => null }

      if( respJsonArr != null && respJsonArr.size() > 0) {
        for ( i <- Range(0,respJsonArr.size()) ) {
          val aoiElem = respJsonArr.getJSONObject(i)
          val aoi_id = aoiElem.getString("aoiId")
          val aoi_code = aoiElem.getString("aoiCode")
          val aoi_name = transAddr(aoiElem.getString("aoiName"))
          val coors = aoiElem.getJSONArray("coors")
          if( coors != null && coors.size() > 0 )
            for ( i <- Range(0,coors.size()) ) {
              val json = coors.getJSONObject(i)
              val lat  = json.getString("lat")
              val lng  = json.getString("lng")
              trackMap += (lat,lng) -> ( aoi_id,aoi_code,aoi_name )
            }
        }
      }

      respJsonArr.clear()
    } else
      logger.error(s"${getTrackAoisUrl}返回结果异常:$rsp => "+trackArr)

    obj.put("coor_aoi_req",trackArr)
    obj.put("coor_aoi_resp",rsp)

    logger.error("trackMap.size:" + trackMap.size)

    for ( jsonObj <- resJsonList  ) {
      val prefix = jsonObj.getString("prefix")
      if( trackMap != null && trackMap.size >= 1 ){
        val delivery_lat = jsonObj.getString("ycoord")
        val delivery_lgt = jsonObj.getString("xcoord")

        if (trackMap.contains((delivery_lat,delivery_lgt))) {
          val tur = trackMap.get((delivery_lat,delivery_lgt)).get
          obj.put( prefix+"_aoiid", tur._1 )
          obj.put( prefix+"_aoicode", tur._2 )
          obj.put( prefix+"_aoiname", tur._3 )
        }
      }
    }
  }


  def doPost(trackArr: JSONArray) = {
    val start = System.currentTimeMillis()

    val rsp = SparkUtils.doPostArr(getTrackAoisUrl,trackArr,logger)

    val end = System.currentTimeMillis()

    if (end - start > 5000)
      logger.error( "请求超时 => " + trackArr )

    logger.error(s"cost:${end - start} => " + rsp)
    logger.error("trackArr => "+trackArr)

    rsp
  }





  /**
   * 获取aoi code和aoi id的映射集合
   * @param spark
   * @param aoicodeList
   * @return
   */
  def getAoicodeIdMap( spark:SparkSession,aoicodeList: scala.List[String]) ={
    logger.error("开始获取aoicode和id的映射")

    var querySql =
      s"""
         |select
         |  aoi_code,aoi_id
         |from dm_gis.cms_aoi_sch
         |""".stripMargin

    if ( aoicodeList.size < 100000 && aoicodeList.size > 0 ){
      querySql =
        s"""
           |$querySql
           |where aoi_code in ('${aoicodeList.mkString("','")}')
           |""".stripMargin
    }

    logger.error(querySql)

    val aoicodeIdMap = spark.sql(querySql).rdd.map(obj => (obj.getString(0),obj.getString(1))).collect().toMap

    logger.error(s"共获取aoi code和id映射数据共:${aoicodeIdMap.size}")

    spark.sparkContext.broadcast(aoicodeIdMap)
  }


  /**
   * 进行压审补
   * @param reviewRdd
   * @return
   */
  def doReview(reviewRdd:RDD[JSONObject] ) ={
    logger.error("开始进行压审补")

    val doReviewRdd = reviewRdd.mapPartitionsWithIndex((index,iter) => {

      for ( obj <- iter ) yield {
        val tag = obj.getString("tag")
        val cityCode = obj.getString("city_code")
        val address = obj.getString("req_addresseeaddr")
        val znoCode = obj.getString("finalzc")

        if("ok_chkn".equals(tag)){
          val aoiId =obj.getString("r_aoi_id")

          if( StringUtils.isNotEmpty(aoiId) ){
            val reviewReq = String.format(reviewUrl,cityCode,address,znoCode,aoiId)
            //val reviewRes = Utils.retryGet(reviewReq)

            obj.put("review_req",reviewReq)
            obj.put("review_resp","")
          }
        }

        if( "ts_chkn".equals(tag) ){
          val aoiId = obj.getString("gd_aoiid")

          if( StringUtils.isNotEmpty(aoiId) ){
            val reviewReq = String.format(reviewUrl,cityCode,address,znoCode,aoiId)
            //val reviewRes = Utils.retryGet(reviewReq)

            obj.put("review_req",reviewReq)
            obj.put("review_resp","")
          }
        }

        obj
      }
    }).persist(StorageLevel.DISK_ONLY)

    logger.error(s"共进行审补数据:${doReviewRdd.filter(obj => Array("ok_chkn","ts_chkn").contains(obj.getString("tag"))).count()}")

    doReviewRdd.map( obj => {
      Row(
        obj.getString("waybillno"),
        obj.getString("city_code"),
        obj.getString("org_code"),
        obj.getString("userid"),
        obj.getString("operatime_new"),
        obj.getString("delivery_lgt"),
        obj.getString("delivery_lat"),
        obj.getString("req_waybillno"),
        obj.getString("req_destcitycode"),
        obj.getString("req_addresseeaddr"),
        obj.getString("req_comp_name"),
        obj.getString("finalzc"),
        obj.getString("finalaoicode"),
        obj.getString("gisaoisrc"),
        obj.getString("tag1"),
        obj.getString("tag2"),
        obj.getString("r_aoi"),
        obj.getString("r_aoi_id"),
        obj.getString("r_aoi_area"),
        obj.getString("key_word"),
        obj.getString("key_tag"),
        obj.getString("mapa_aoiid"),
        obj.getString("mapa_aoicode"),
        obj.getString("mapa_aoiname"),
        obj.getString("gd_coor_req"),
        obj.getString("gd_coor_resp"),
        obj.getString("gd_aoiid"),
        obj.getString("gd_aoicode"),
        obj.getString("gd_aoiname"),
        obj.getString("gis_aoi_code"),
        obj.getString("gis_aoi_name"),
        obj.getString("80_aoi_code"),
        obj.getString("80_aoi_name"),
        obj.getString("gis_aoi_area"),
        obj.getString("addr_r_aoi_freq"),
        obj.getString("tag"),
        obj.getString("step"),
        obj.getString("bd_coor_req"),
        obj.getString("bd_coor_resp"),
        obj.getString("bd_aoiid"),
        obj.getString("bd_aoicode"),
        obj.getString("bd_aoiname"),
        obj.getString("tc_coor_req"),
        obj.getString("tc_coor_resp"),
        obj.getString("tc_aoiid"),
        obj.getString("tc_aoicode"),
        obj.getString("tc_aoiname"),
        obj.getString("coor_aoi_req"),
        obj.getString("coor_aoi_resp"),
        obj.getString("review_req"),
        obj.getString("review_resp"),
        obj.getString("inc_day")
      )
    })
  }


  /**
   * 获取r_aoi对应的aoi_id
   * @param aoicodeIdMapBroadCast
   * @param aoiInfoRdd
   * @return
   */
  def processAoicode( aoicodeIdMapBroadCast:Broadcast[Map[String, String]], aoiInfoRdd: RDD[JSONObject])={
    logger.error("开始处理r_aoi code")

    aoiInfoRdd.mapPartitions(iter => {
      val aoicodeIdMap= aoicodeIdMapBroadCast.value
      for (obj <- iter) yield {
        val r_aoi_id = aoicodeIdMap.getOrElse(obj.getString("r_aoi"),"")
        obj.put("r_aoi_id",r_aoi_id)

        if(StringUtils.isEmpty(obj.getString("finalaoiid"))){
          val finalaoiid = aoicodeIdMap.getOrElse(obj.getString("finalaoicode"),"")
          obj.put("finalaoiid",finalaoiid)
        }

        obj
      }
    })
  }


  /**
    * 中文转数字
    * @param orginal
    * @return
    */
  def chToNum( orginal:String ) ={
    val p0 = """(一|二|三|四|五|六|七|八|九)百(一|二|三|四|五|六|七|八|九)十(一|二|三|四|五|六|七|八|九)""".r
    val p1 = """(一|二|三|四|五|六|七|八|九)百(一|二|三|四|五|六|七|八|九)十?""".r
    val p2 = """(一|二|三|四|五|六|七|八|九)百""".r
    val p3 = """(一|二|三|四|五|六|七|八|九)十(一|二|三|四|五|六|七|八|九)""".r
    val p4 = """(一|二|三|四|五|六|七|八|九)十""".r
    val p5 = """(一|二|三|四|五|六|七|八|九|零)""".r

    val mapping=Map("一"->1,"二"->2,"三"->3,"四"->4,"五"->5,"六"->6,"七"->7,"八"->8,"九"->9,"零"->0)
    var text = orginal
    text=p0.replaceAllIn(text, matchs => {
      mapping(matchs.group(1))*100+mapping(matchs.group(2))*10+mapping(matchs.group(3))+""
    })
    text=p1.replaceAllIn(text,matchs =>{
      mapping(matchs.group(1))*100+mapping(matchs.group(2))*10+""
    })
    text=p2.replaceAllIn(text,matchs =>{
      mapping(matchs.group(1))*100+""
    })
    text=p3.replaceAllIn(text,matchs =>{
      mapping(matchs.group(1))*10+mapping(matchs.group(2))+""
    })
    text=p4.replaceAllIn(text,matchs =>{
      mapping(matchs.group(1))*10+""
    })
    text=p5.replaceAllIn(text,matchs =>{
      mapping(matchs.group(1))+""
    })

    text
  }


  def transAddr( addr:String ) = {
    if(StringUtils.isNotEmpty(addr))
      chToNum( addr.toUpperCase ).replaceAll("(坐)|(座)|(栋)|(号楼)", "栋")
    else
      addr
  }


}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值