package com.fengtu.sparktest.aoiTest import java.net.URLEncoder import com.alibaba.fastjson.serializer.SerializerFeature import com.alibaba.fastjson.{JSON, JSONArray, JSONObject} import com.fengtu.sparktest.utils.SparkUtils import com.fengtu.sparktest.utils2.Utils import org.apache.commons.lang.StringUtils import org.apache.log4j.Logger import org.apache.spark.broadcast.Broadcast import org.apache.spark.rdd.RDD import org.apache.spark.sql.types.{StringType, StructField, StructType} import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.storage.StorageLevel /* *aoi真实准确率-判错工艺压审补 */ object AoiRealAccturyRateJudgeWrong2Review { val appName: String = this.getClass.getSimpleName.replace("$", "") val logger: Logger = Logger.getLogger(appName) //关闭fastjson引用检测 JSON.DEFAULT_GENERATE_FEATURE |= SerializerFeature.DisableCircularReferenceDetect.getMask val calPartition = 2000 //图商接口 val tsUrl = "http://gis-int.int.sfdc.com.cn:1080/geo/api?ak=3a191e7427e8470c86271a069411c66b&opt=%s&address=%s&city=%s" //坐标挂接aoi接口 var getTrackAoisUrl = "http://10.119.72.206:9000/data/aoiSdk/getTrackAois?showAoiCode=1".stripMargin //审补 val reviewUrl = "http://gis-cms-bg.sf-express.com/cms/api/address/rgsbAdd?cityCode=%s&address=%s&znoCode=%s&aoiId=%s&operSource=真实AOI准确率" val limitMin = 60000 / 60 def main(args:Array[String])= { val spark = SparkSession.builder() .appName("SparkDecode") .master("yarn") .enableHiveSupport() .config("hive.exec.dynamic.partition",true) .config("hive.exec.dynamic.partition.mode","nonstrict") .getOrCreate() spark.sparkContext.setLogLevel( "ERROR" ) val incDay = args(0) run(spark,incDay) } def run(spark:SparkSession,incDay:String): Unit = { //获取最后流程数据 val ( aoiInfoRdd,aoicodeList,addrRaoifrequencyMapBroadCast) = getLastFlowData( spark,incDay) //获取aoicode对应的aoiid val aoicodeIdMapBroadCast = getAoicodeIdMap(spark,aoicodeList) //处理r_aoi和 finalaoi code val aoiInfoTmpRdd = processAoicode(aoicodeIdMapBroadCast,aoiInfoRdd) logger.error("aoiInfoTmpRdd的数据总量为:" + aoiInfoTmpRdd.count()) aoiInfoTmpRdd.take(2).foreach(println(_)) //获取aoicode对应区域数据 val aoiAreaMapBroadCast = getAoiAreaMap( spark,aoicodeList ) //处理压审补数据 val reviewRdd = processReviewData( aoiInfoTmpRdd,aoiAreaMapBroadCast,addrRaoifrequencyMapBroadCast ) aoicodeIdMapBroadCast.unpersist() addrRaoifrequencyMapBroadCast.unpersist() //进行压审补 val saveRdd = doReview(reviewRdd ) //入库 SparkUtils.df2Hive ( spark,saveRdd,schema,"append", "dm_gis.aoi_real_acctury_rate_judge_wrong_2_review","inc_day",incDay,logger ) } val schema = StructType( List ( StructField("waybillno", StringType, true), StructField("city_code", StringType, true), StructField("org_code", StringType, true), StructField("userid", StringType, true), StructField("operatime_new", StringType, true), StructField("delivery_lgt", StringType, true), StructField("delivery_lat", StringType, true), StructField("req_waybillno", StringType, true), StructField("req_destcitycode", StringType, true), StructField("req_addresseeaddr", StringType, true), StructField("req_comp_name", StringType, true), StructField("finalzc", StringType, true), StructField("finalaoicode", StringType, true), StructField("gisaoisrc", StringType, true), StructField("tag1", StringType, true), StructField("tag2", StringType, true), StructField("r_aoi", StringType, true), StructField("r_aoi_id", StringType, true), StructField("r_aoi_area", StringType, true), StructField("key_word", StringType, true), StructField("key_tag", StringType, true), StructField("mapa_aoiid", StringType, true), StructField("mapa_aoicode", StringType, true), StructField("mapa_aoiname", StringType, true), StructField("gd_coor_req", StringType, true), StructField("gd_coor_resp", StringType, true), StructField("gd_aoiid", StringType, true), StructField("gd_aoicode", StringType, true), StructField("gd_aoiname", StringType, true), StructField("gis_aoi_code", StringType, true), StructField("gis_aoi_name", StringType, true), StructField("80_aoi_code", StringType, true), StructField("80_aoi_name", StringType, true), StructField("gis_aoi_area", StringType, true), StructField("addr_r_aoi_freq", StringType, true), StructField("tag", StringType, true), StructField("step", StringType, true), StructField("bd_coor_req", StringType, true), StructField("bd_coor_resp", StringType, true), StructField("bd_aoiid", StringType, true), StructField("bd_aoicode", StringType, true), StructField("bd_aoiname", StringType, true), StructField("tc_coor_req", StringType, true), StructField("tc_coor_resp", StringType, true), StructField("tc_aoiid", StringType, true), StructField("tc_aoicode", StringType, true), StructField("tc_aoiname", StringType, true), StructField("coor_aoi_req", StringType, true), StructField("coor_aoi_resp", StringType, true), StructField("review_req", StringType, true), StructField("review_resp", StringType, true), StructField("inc_day", StringType, true) ) ) /** * 获取需要获取区域的aoicode集合 和 aoi真实著虐率最后流程数据 * @param spark * @param incDay */ def getLastFlowData( spark:SparkSession,incDay:String ) = { logger.error("开始获取判错运营工艺数据") val querySql = s""" |select | * |from | dm_gis.aoi_real_acc_rate_9 |where inc_day= '$incDay' and tag1='wrong' |""".stripMargin logger.error(querySql) val df = spark.sql(querySql).persist(StorageLevel.MEMORY_AND_DISK_SER) val sourRdd = SparkUtils.getRowToJson(df,calPartition) //获取需要获取区域的aoiid集合 val aoicodeList = sourRdd.flatMap(obj => Array(obj.getString("r_aoi"),obj.getString("finalaoicode"))) .filter( aoicode => StringUtils.isNotEmpty(aoicode) ).distinct().collect().toList //聚合地址去重计算r_aoi频次 val addrRaoifrequencyMap = sourRdd.map(obj => ( obj.getString("req_addresseeaddr"),obj.getString("r_aoi") )).filter( obj => StringUtils.isNotEmpty(obj._1) ) .groupByKey().map(obj => (obj._1,obj._2.toList.distinct.size)).collect().toMap ( sourRdd,aoicodeList,spark.sparkContext.broadcast( addrRaoifrequencyMap )) } /** * 获取aoi对应的aoi区域 * @param spark * @param aoicodeList * @return */ def getAoiAreaMap( spark:SparkSession,aoicodeList:List[String] ) = { logger.error("开始获取aoiid对应的aoi区域编码") var querySql = s""" |select | aoi_id, | aoi_area_code |from |dm_tc_waybillinfo.aoi_area_aoi |""".stripMargin if ( aoicodeList.size < 100000 && aoicodeList.size > 0 ){ querySql = s""" |$querySql |where aoi_id in ('${aoicodeList.mkString("','")}') |""".stripMargin } logger.error(querySql) val aoiAreaMap = spark.sql(querySql).rdd.map( obj => { ( obj.getString(0),obj.getString(1) ) }).collect().toMap logger.error(s"共获取aoi区域:${aoiAreaMap.size}") spark.sparkContext.broadcast(aoiAreaMap) } /** * 处理压审补逻辑 * @param aoiInfoRdd * @param aoiAreaMapBroadCast * @param addrRaoifrequencyMapBroadCast * @return */ def processReviewData( aoiInfoRdd:RDD[JSONObject], aoiAreaMapBroadCast:Broadcast[Map[String, String]], addrRaoifrequencyMapBroadCast: Broadcast[Map[String, Int]] ) ={ logger.error("开始处理要进行审补的数据") val reviewRdd = aoiInfoRdd.mapPartitionsWithIndex((index, iter) => { val aoiAreaMap = aoiAreaMapBroadCast.value val addrRaoifrequencyMap = addrRaoifrequencyMapBroadCast.value var startTime = System.currentTimeMillis() var cnt = 0 for (obj <- iter) yield { val r_aoi = obj.getString("r_aoi") val r_aoi_id = obj.getString("r_aoi_id") val finalaoicode = obj.getString("finalaoicode") val finalaoiid = obj.getString("finalaoiid") val req_addresseeaddr = obj.getString("req_addresseeaddr") val gisaoisrc = obj.getString("gisaoisrc") val req_destcitycode = obj.getString("req_destcitycode") val gd_aoiid = obj.getString("gd_aoiid") var r_aoi_areacode = "" var finalaoicodeNew = "" if( StringUtils.isNotEmpty(r_aoi) && r_aoi.length > 8){ r_aoi_areacode = r_aoi.substring(0, r_aoi.length() - 6) }else{ r_aoi_areacode = r_aoi } if( StringUtils.isNotEmpty(finalaoicode) && r_aoi.length > 8){ finalaoicodeNew = finalaoicode.substring(0, r_aoi.length() - 6) }else{ finalaoicodeNew = finalaoicode } val finalzc = obj.getString("finalzc") val r_aoi_area = aoiAreaMap.getOrElse(r_aoi_id, "") val gis_aoi_area = aoiAreaMap.getOrElse(finalaoiid, "") val addrRaoifrequency = addrRaoifrequencyMap.getOrElse(req_addresseeaddr, -1) obj.put("r_aoi_area", r_aoi_area) obj.put("gis_aoi_area", gis_aoi_area) obj.put("addr_r_aoi_freq", addrRaoifrequency) //finalaoi的AOI区域与r_aoi的AOI区域一致? if (StringUtils.isNotEmpty(r_aoi_area) && StringUtils.isNotEmpty(gis_aoi_area) && r_aoi_area.equals(gis_aoi_area)) { //finalaoi的AOI网点与r_aoi的AOI网点一致? if (StringUtils.isNotEmpty(finalaoicodeNew) && StringUtils.isNotEmpty(r_aoi_areacode) && r_aoi_areacode.equals(finalaoicodeNew)) { obj.put("tag", "no_chkn") } else { //gisaoisrc来源为normcompany、normhp //或工单对应r_aoi频次>=2 //或工单结尾词为:村/工业区/工业园/工业园区/产业园? if (Array("normcompany", "normhp").contains(gisaoisrc) || addrRaoifrequency >= 2 || (StringUtils.isNotEmpty(req_addresseeaddr) && req_addresseeaddr.matches("(.*村$)|(.*工业区$)|(.*工业园$)|(.*工业园区$)|(.*产业园$)"))) { obj.put("tag", "no_chkn") obj.put("step", "1") } else obj.put("tag", "ok_chkn") } } else { //限制ak访问量 if (cnt == limitMin) { val endTime = System.currentTimeMillis() - startTime if (endTime <= 60 * 1000) { logger.error(s"分区$index,每分钟访问量超过限制$limitMin,休眠${60 * 1000 - endTime} ms中") Thread.sleep(60 * 1000 - endTime) } startTime = System.currentTimeMillis() cnt = 0 } //跑图商(高德/百度/腾讯)获取坐标 val (resJsonList, trackArr, cntTmp) = getTsCoor(gd_aoiid, obj, req_addresseeaddr, req_destcitycode) cnt += cntTmp //坐标挂接aoi getAoiByCoor(trackArr, resJsonList, obj, gd_aoiid) //gd_aoiid=bd_aoiid=tc_aoiid&gd_aoiid不为空? if ( StringUtils.isNotEmpty(obj.getString("gd_aoiid")) && obj.getString("gd_aoiid").equals(obj.getString("bd_aoiid")) && obj.getString("bd_aoiid").equals(obj.getString("tc_aoiid")) ) { if ( Array("normcompany", "normhp").contains(gisaoisrc) || (StringUtils.isNotEmpty(req_addresseeaddr) && req_addresseeaddr.matches("(.*村$)|(.*工业区$)|(.*工业园$)|(.*工业园区$)|(.*产业园$)")) ) { obj.put("tag", "no_chkn") obj.put("step", "3") } else obj.put("tag", "ts_chkn") } else { obj.put("tag", "no_chkn") obj.put("step", "2") } } obj } }).persist(StorageLevel.DISK_ONLY) logger.error(s"共处理数据:${reviewRdd.count()}") reviewRdd } /** * http get请求异常时,重试三次 * @param url * @param opt * @param address * @param city * @param isSleep * @return */ def getDataFromUrlRetry3(obj:JSONObject,typeStr:String,url:String,opt:String,address:String,city:String,isSleep:Boolean=false) = { var icnt = 0 var resp = "" val req = String.format ( url,opt,URLEncoder.encode( address,"utf-8" ) ,city ) while ( icnt <= 5 && StringUtils.isEmpty(resp) ) { resp = try { Utils.retryGet( req ) } catch { case e:Exception => e.getMessage() } icnt += 1 if ( ! resp.contains( "\"status\":0") && ! resp.contains( "\"err\":-105") ) { logger.error(s"${req}接口请求异常:\n$resp,正在重试第${icnt}次") resp = "" if(isSleep) Thread.sleep(100) } } val resJson = new JSONObject() parseCoor( resp,resJson ) obj.put(s"${typeStr}_coor_req",req) obj.put(s"${typeStr}_coor_resp",resJson) resJson.put("prefix",typeStr) resJson } /** * 解析图商接口返回的坐标 * @param respJsonStr * @param obj * @return */ def parseCoor( respJsonStr:String, obj:JSONObject ) ={ val (x,y,precision) = ( try { JSON.parseObject(respJsonStr).getJSONObject("result").getString("xcoord")} catch { case _ =>"" }, try { JSON.parseObject(respJsonStr).getJSONObject("result").getString("ycoord")} catch { case _ =>"" }, try { JSON.parseObject(respJsonStr).getJSONObject("result").getString("precision")} catch { case _ =>"" } ) obj.put( "xcoord",x ) obj.put( "ycoord",y ) obj.put( "precision",precision ) obj } /** * 地址请求图商坐标 * @param gd_aoiid * @param obj * @param req_addresseeaddr * @param req_destcitycode * @return */ def getTsCoor( gd_aoiid:String,obj:JSONObject,req_addresseeaddr:String,req_destcitycode: String ) = { var gdResJson = new JSONObject() var bdResJson = new JSONObject() var tcResJson = new JSONObject() var cnt = 0 if ( StringUtils.isEmpty( gd_aoiid ) ) { //获取gd坐标 gdResJson = getDataFromUrlRetry3(obj,"gd",tsUrl,"gd2",req_addresseeaddr,req_destcitycode,true) cnt += 1 } //如果gd坐标不为空且precision=2(高精) if ( StringUtils.isNotEmpty( gd_aoiid ) || ( StringUtils.isNotEmpty(gdResJson.getString("xcoord")) && StringUtils.isNotEmpty(gdResJson.getString("ycoord")) && "2".equals(gdResJson.getString("precision")) ) ){ //获取百度坐标 bdResJson = getDataFromUrlRetry3(obj,"bd",tsUrl,"bd2",req_addresseeaddr,req_destcitycode,true) cnt += 1 //如果bd坐标不为空且precision=2(高精) if ( StringUtils.isNotEmpty(bdResJson.getString("xcoord")) && StringUtils.isNotEmpty(bdResJson.getString("ycoord")) && "2".equals(gdResJson.getString("precision")) ){ //获取腾讯坐标 tcResJson = getDataFromUrlRetry3(obj,"tc",tsUrl,"tc2",req_addresseeaddr,req_destcitycode,true) cnt += 1 } } val trackArr = new JSONArray() //将需要获取aoi的坐标一起返回 if ( StringUtils.isNotEmpty(bdResJson.getString("xcoord")) && StringUtils.isNotEmpty(bdResJson.getString("ycoord")) && "2".equals(gdResJson.getString("precision")) && StringUtils.isNotEmpty(tcResJson.getString("xcoord")) && StringUtils.isNotEmpty(tcResJson.getString("ycoord")) && "2".equals(tcResJson.getString("precision")) ) { val bdJson = new JSONObject() bdJson.put( "lat",bdResJson.getString("ycoord" )) bdJson.put( "lng",bdResJson.getString("xcoord") ) val tcJson = new JSONObject() tcJson.put( "lat",tcResJson.getString("ycoord" )) tcJson.put( "lng",tcResJson.getString("xcoord") ) trackArr.add(bdJson) trackArr.add(tcJson) if( StringUtils.isEmpty( gd_aoiid ) ){ val gdJson = new JSONObject() gdJson.put( "lat",gdResJson.getString("ycoord" )) gdJson.put( "lng",gdResJson.getString("xcoord") ) trackArr.add(gdJson) } } (List(gdResJson,bdResJson,tcResJson),trackArr,cnt) } /** * 点面挂接aoi * @param trackArr * @param resJsonList * @param obj */ def getAoiByCoor( trackArr:JSONArray, resJsonList:List[JSONObject], obj:JSONObject, gd_aoiid:String ) = { //坐标挂接aoi var rsp = "" if ( trackArr != null && trackArr.size > 0 ) { var cnt = 0 while ( cnt < 3 && StringUtils.isEmpty(rsp) ) { cnt += 1 rsp = doPost(trackArr) if( ! rsp.contains("\"code\":200") ) { logger.error(s"接口请求异常$rsp,正在重试第${cnt}次") rsp = "" } } } var trackMap = Map[(String,String),(String,String,String)]() //解析轨迹所在aoi if ( StringUtils.isNotEmpty(rsp) && rsp.contains("\"code\":200") ) { val respJsonArr = try { JSON.parseObject(rsp).getJSONArray("data") } catch { case _ => null } if( respJsonArr != null && respJsonArr.size() > 0) { for ( i <- Range(0,respJsonArr.size()) ) { val aoiElem = respJsonArr.getJSONObject(i) val aoi_id = aoiElem.getString("aoiId") val aoi_code = aoiElem.getString("aoiCode") val aoi_name = transAddr(aoiElem.getString("aoiName")) val coors = aoiElem.getJSONArray("coors") if( coors != null && coors.size() > 0 ) for ( i <- Range(0,coors.size()) ) { val json = coors.getJSONObject(i) val lat = json.getString("lat") val lng = json.getString("lng") trackMap += (lat,lng) -> ( aoi_id,aoi_code,aoi_name ) } } } respJsonArr.clear() } else logger.error(s"${getTrackAoisUrl}返回结果异常:$rsp => "+trackArr) obj.put("coor_aoi_req",trackArr) obj.put("coor_aoi_resp",rsp) logger.error("trackMap.size:" + trackMap.size) for ( jsonObj <- resJsonList ) { val prefix = jsonObj.getString("prefix") if( trackMap != null && trackMap.size >= 1 ){ val delivery_lat = jsonObj.getString("ycoord") val delivery_lgt = jsonObj.getString("xcoord") if (trackMap.contains((delivery_lat,delivery_lgt))) { val tur = trackMap.get((delivery_lat,delivery_lgt)).get obj.put( prefix+"_aoiid", tur._1 ) obj.put( prefix+"_aoicode", tur._2 ) obj.put( prefix+"_aoiname", tur._3 ) } } } } def doPost(trackArr: JSONArray) = { val start = System.currentTimeMillis() val rsp = SparkUtils.doPostArr(getTrackAoisUrl,trackArr,logger) val end = System.currentTimeMillis() if (end - start > 5000) logger.error( "请求超时 => " + trackArr ) logger.error(s"cost:${end - start} => " + rsp) logger.error("trackArr => "+trackArr) rsp } /** * 获取aoi code和aoi id的映射集合 * @param spark * @param aoicodeList * @return */ def getAoicodeIdMap( spark:SparkSession,aoicodeList: scala.List[String]) ={ logger.error("开始获取aoicode和id的映射") var querySql = s""" |select | aoi_code,aoi_id |from dm_gis.cms_aoi_sch |""".stripMargin if ( aoicodeList.size < 100000 && aoicodeList.size > 0 ){ querySql = s""" |$querySql |where aoi_code in ('${aoicodeList.mkString("','")}') |""".stripMargin } logger.error(querySql) val aoicodeIdMap = spark.sql(querySql).rdd.map(obj => (obj.getString(0),obj.getString(1))).collect().toMap logger.error(s"共获取aoi code和id映射数据共:${aoicodeIdMap.size}") spark.sparkContext.broadcast(aoicodeIdMap) } /** * 进行压审补 * @param reviewRdd * @return */ def doReview(reviewRdd:RDD[JSONObject] ) ={ logger.error("开始进行压审补") val doReviewRdd = reviewRdd.mapPartitionsWithIndex((index,iter) => { for ( obj <- iter ) yield { val tag = obj.getString("tag") val cityCode = obj.getString("city_code") val address = obj.getString("req_addresseeaddr") val znoCode = obj.getString("finalzc") if("ok_chkn".equals(tag)){ val aoiId =obj.getString("r_aoi_id") if( StringUtils.isNotEmpty(aoiId) ){ val reviewReq = String.format(reviewUrl,cityCode,address,znoCode,aoiId) //val reviewRes = Utils.retryGet(reviewReq) obj.put("review_req",reviewReq) obj.put("review_resp","") } } if( "ts_chkn".equals(tag) ){ val aoiId = obj.getString("gd_aoiid") if( StringUtils.isNotEmpty(aoiId) ){ val reviewReq = String.format(reviewUrl,cityCode,address,znoCode,aoiId) //val reviewRes = Utils.retryGet(reviewReq) obj.put("review_req",reviewReq) obj.put("review_resp","") } } obj } }).persist(StorageLevel.DISK_ONLY) logger.error(s"共进行审补数据:${doReviewRdd.filter(obj => Array("ok_chkn","ts_chkn").contains(obj.getString("tag"))).count()}") doReviewRdd.map( obj => { Row( obj.getString("waybillno"), obj.getString("city_code"), obj.getString("org_code"), obj.getString("userid"), obj.getString("operatime_new"), obj.getString("delivery_lgt"), obj.getString("delivery_lat"), obj.getString("req_waybillno"), obj.getString("req_destcitycode"), obj.getString("req_addresseeaddr"), obj.getString("req_comp_name"), obj.getString("finalzc"), obj.getString("finalaoicode"), obj.getString("gisaoisrc"), obj.getString("tag1"), obj.getString("tag2"), obj.getString("r_aoi"), obj.getString("r_aoi_id"), obj.getString("r_aoi_area"), obj.getString("key_word"), obj.getString("key_tag"), obj.getString("mapa_aoiid"), obj.getString("mapa_aoicode"), obj.getString("mapa_aoiname"), obj.getString("gd_coor_req"), obj.getString("gd_coor_resp"), obj.getString("gd_aoiid"), obj.getString("gd_aoicode"), obj.getString("gd_aoiname"), obj.getString("gis_aoi_code"), obj.getString("gis_aoi_name"), obj.getString("80_aoi_code"), obj.getString("80_aoi_name"), obj.getString("gis_aoi_area"), obj.getString("addr_r_aoi_freq"), obj.getString("tag"), obj.getString("step"), obj.getString("bd_coor_req"), obj.getString("bd_coor_resp"), obj.getString("bd_aoiid"), obj.getString("bd_aoicode"), obj.getString("bd_aoiname"), obj.getString("tc_coor_req"), obj.getString("tc_coor_resp"), obj.getString("tc_aoiid"), obj.getString("tc_aoicode"), obj.getString("tc_aoiname"), obj.getString("coor_aoi_req"), obj.getString("coor_aoi_resp"), obj.getString("review_req"), obj.getString("review_resp"), obj.getString("inc_day") ) }) } /** * 获取r_aoi对应的aoi_id * @param aoicodeIdMapBroadCast * @param aoiInfoRdd * @return */ def processAoicode( aoicodeIdMapBroadCast:Broadcast[Map[String, String]], aoiInfoRdd: RDD[JSONObject])={ logger.error("开始处理r_aoi code") aoiInfoRdd.mapPartitions(iter => { val aoicodeIdMap= aoicodeIdMapBroadCast.value for (obj <- iter) yield { val r_aoi_id = aoicodeIdMap.getOrElse(obj.getString("r_aoi"),"") obj.put("r_aoi_id",r_aoi_id) if(StringUtils.isEmpty(obj.getString("finalaoiid"))){ val finalaoiid = aoicodeIdMap.getOrElse(obj.getString("finalaoicode"),"") obj.put("finalaoiid",finalaoiid) } obj } }) } /** * 中文转数字 * @param orginal * @return */ def chToNum( orginal:String ) ={ val p0 = """(一|二|三|四|五|六|七|八|九)百(一|二|三|四|五|六|七|八|九)十(一|二|三|四|五|六|七|八|九)""".r val p1 = """(一|二|三|四|五|六|七|八|九)百(一|二|三|四|五|六|七|八|九)十?""".r val p2 = """(一|二|三|四|五|六|七|八|九)百""".r val p3 = """(一|二|三|四|五|六|七|八|九)十(一|二|三|四|五|六|七|八|九)""".r val p4 = """(一|二|三|四|五|六|七|八|九)十""".r val p5 = """(一|二|三|四|五|六|七|八|九|零)""".r val mapping=Map("一"->1,"二"->2,"三"->3,"四"->4,"五"->5,"六"->6,"七"->7,"八"->8,"九"->9,"零"->0) var text = orginal text=p0.replaceAllIn(text, matchs => { mapping(matchs.group(1))*100+mapping(matchs.group(2))*10+mapping(matchs.group(3))+"" }) text=p1.replaceAllIn(text,matchs =>{ mapping(matchs.group(1))*100+mapping(matchs.group(2))*10+"" }) text=p2.replaceAllIn(text,matchs =>{ mapping(matchs.group(1))*100+"" }) text=p3.replaceAllIn(text,matchs =>{ mapping(matchs.group(1))*10+mapping(matchs.group(2))+"" }) text=p4.replaceAllIn(text,matchs =>{ mapping(matchs.group(1))*10+"" }) text=p5.replaceAllIn(text,matchs =>{ mapping(matchs.group(1))+"" }) text } def transAddr( addr:String ) = { if(StringUtils.isNotEmpty(addr)) chToNum( addr.toUpperCase ).replaceAll("(坐)|(座)|(栋)|(号楼)", "栋") else addr } }
spark业务案例
最新推荐文章于 2024-04-02 16:46:02 发布