package com.fengtu.sparktest.aoiTest import java.sql.Connection import java.security.MessageDigest import com.alibaba.fastjson.JSONObject import com.fengtu.sparktest.utils.{DbUtils, JavaUtil, JdbcTemplateUtil, StringUtils} import org.apache.log4j.Logger import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession} import org.apache.spark.storage.StorageLevel import org.apache.spark.sql.functions._ import java.util.{Objects, Properties} import com.fengtu.sparktest.JSONUtil import scala.util.Random object AreaStaShouUnDistinguish { @transient lazy val logger: Logger = Logger.getLogger(AreaStaShouUnDistinguish.getClass) val appName: String = this.getClass.getSimpleName.replace("$", "") def main(args: Array[String]): Unit = { //val inc_day ="20201107" //val str = GetGjAoiInterface.getContent("http://100.118.72.24:8088/adjusttrack","{\"inc_day\":\"20201107\",\"un\":\"40398702\",\"bn\":\"571FE\",\"tp\":\"20201107\",\"tracks\":[{\"ac\":\"12\",\"x\":\"120.488271\",\"y\":\"30.256971\",\"tm\":\"1604716191\",\"t_time\":\"2020/11/7 10:29\"},{\"ac\":\"13\",\"x\":\"120.488068\",\"y\":\"30.256066\",\"tm\":\"1604716311\",\"t_time\":\"2020/11/7 10:31\"},{\"ac\":\"30\",\"x\":\"120.493522\",\"y\":\"30.260036\",\"tm\":\"1604737005\",\"t_time\":\"2020/11/7 16:16\"},{\"ac\":\"7\",\"x\":\"120.493521\",\"y\":\"30.259342\",\"tm\":\"1604737041\",\"t_time\":\"2020/11/7 16:17\"},{\"ac\":\"12\",\"x\":\"120.492902\",\"y\":\"30.2581\",\"tm\":\"1604737084\",\"t_time\":\"2020/11/7 16:18\"}]}") val inc_day = args(0) //val inc_day = "20210201" start(inc_day) } def start(inc_day: String): Unit = { val spark = SparkSession .builder() .appName("SparkDecode") .master("yarn") .enableHiveSupport() .config("hive.exec.dynamic.partition", true) .config("hive.exec.dynamic.partition.mode", "nonstrict") .getOrCreate() spark.sparkContext.setLogLevel("ERROR") startSta(spark, inc_day) } case class AreaStaShou( uid:String ,area:String ,region:String ,city:String ,citycode:String ,deptcode:String ,zonecode:String ,barscantm:String ,address:String ,company:String ,phone:String ,mobile:String ,baroprcode:String ,src:String ,finalzc:String ,finaltc:String ,group_id:String ,syncreqdatetime:String ,arssdeptcode:String ,arssteamcode:String ,arssaoicode:String ,arssempcode:String ,arssredatatime:String ,inc_day:String ) case class cityToCode(city_code:String,area_code:String,area_name:String) // private val APPLICATION_DB: String = "area_db.properties" // private val DRIVER: String = "com.mysql.jdbc.Driver" // private val URL: String = "jdbc:mysql://gisaosdw-m.db.sfcloud.local:3306/aosdw?useSSL=false&useUnicode=true&characterEncoding=utf8mb4&autoReconnect=true&failOverReadOnly=false&useOldAliasMetadataBehavior=true" // private val USERNAME: String = "aosdw" // private val PASSWORD: String = "@aosdw123" private val DB_TABLE_NAME_No = "oms_no_aoi_no_tc" private val DB_TABLE_NAME_Has = "oms_no_aoi_has_tc" val area_db_file="area_db.properties" def startSta(spark: SparkSession, inc_day: String): Unit = { import spark.implicits._ val citymap = spark.read.format("csv").option("header", "true").load("/user/01401062/upload/gis/project/test/citymap.csv").map(x =>{ val city_code= x.getString(0) val area_code= x.getString(1) val area_name= x.getString(2) cityToCode(city_code,area_code,area_name) }).persist(StorageLevel.MEMORY_AND_DISK_SER) citymap.take(2).foreach(println(_)) citymap.toDF().createOrReplaceTempView("cityToCode") val calSql = s""" |select | citymap.area_code,citymap.area_name, | city,cityCode,deptcode,barscantm,zonecode,addr | ,phone,mobile,baroprcode,src,asyncdeptcode,asyncteamcode,asyncgroupid,syncreqdatetime,arssdeptcode,arssteamcode,arssaoicode,arssempcode,arssredatatime,sysOrderNo |from |( |select | city,cityCode,deptcode,'-' barscantm,'-' zonecode,addr | ,phone,mobile,'-' baroprcode,src,asyncdeptcode,asyncteamcode,asyncgroupid,syncreqdatetime,arssdeptcode,arssteamcode,arssaoicode,arssempcode,arssredatatime,sysOrderNo |from |( |select | city,cityCode,deptcode,concat(province,city,county,address) addr | ,phone,mobile,src,asyncdeptcode,asyncteamcode,asyncgroupid,syncreqdatetime,arssdeptcode,arssteamcode,arssaoicode,arssempcode,arssredatatime,omsNon.sysOrderNo sysOrderNo |from |(select | dataType, sysOrderNo, cityCode, city,address, finalAoiCode,order_no, execution_aoi_id, batch_code_execute,act_time, distribute_code, act_code, dest_dept_code, real_distribute_time,execution_date, final_batch_code, batchresult_body, final_batch_code1,batchresult_body1, final_batch_code2, batchresult_body2, allaoilist,empnolist, is_distribute_receipt, province, county, phone, mobile,company, teamcode |from | dm_gis.aoi_fc_for_area |where | inc_day='${inc_day}' |and | dataType='non' |) omsNon |left outer join |(select | sysOrderNo,src,if(asyncdeptcode ='',syncdeptcode,asyncdeptcode) asyncdeptcode,if(asyncteamcode ='',syncteamcode,asyncteamcode) asyncteamcode,if(asyncgroupid ='',syncgroupid,asyncgroupid) asyncgroupid,syncreqdatetime,arssdeptcode,arssteamcode,arssaoicode,arssempcode,arssredatatime,deptcode | from | dm_gis.gis_rds_omsfrom |where | inc_day='${inc_day}' |) omsfrom |on omsNon.sysOrderNo = omsfrom.sysOrderNo |) oms |) res |left outer join |( |select | city_code,area_code,area_name |from | cityToCode |) citymap |on res.cityCode = citymap.city_code """.stripMargin // val calSql = // s""" // |select // | citymap.area_code,citymap.area_name, // | city,cityCode,deptcode,barscantm,zonecode,addr // | ,phone,mobile,baroprcode,src,asyncdeptcode,asyncteamcode,asyncgroupid,syncreqdatetime,arssdeptcode,arssteamcode,arssaoicode,arssempcode,arssredatatime // |from // |( // |select // | city,cityCode,deptcode,fvp.barscantm barscantm,fvp.zonecode zonecode,addr // | ,phone,mobile,fvp.baroprcode baroprcode,src,asyncdeptcode,asyncteamcode,asyncgroupid,syncreqdatetime,arssdeptcode,arssteamcode,arssaoicode,arssempcode,arssredatatime // |from // |( // |select // | omsNon.sysOrderNo, // | city,cityCode,deptcode,concat(province,city,county,address) addr // | ,phone,mobile,src,asyncdeptcode,asyncteamcode,asyncgroupid,syncreqdatetime,arssdeptcode,arssteamcode,arssaoicode,arssempcode,arssredatatime // |from // |(select // | dataType, sysOrderNo, cityCode, city,address, finalAoiCode,order_no, execution_aoi_id, batch_code_execute,act_time, distribute_code, act_code, dest_dept_code, real_distribute_time,execution_date, final_batch_code, batchresult_body, final_batch_code1,batchresult_body1, final_batch_code2, batchresult_body2, allaoilist,empnolist, is_distribute_receipt, province, county, phone, mobile,company, teamcode // |from // | dm_gis.aoi_fc_for_area // |where // | inc_day='${inc_day}' // |and // | dataType='non' // |) omsNon // |left outer join // |(select // | sysOrderNo,src,if(asyncdeptcode ='',syncdeptcode,asyncdeptcode) asyncdeptcode,if(asyncteamcode ='',syncteamcode,asyncteamcode) asyncteamcode,if(asyncgroupid ='',syncgroupid,asyncgroupid) asyncgroupid,syncreqdatetime,arssdeptcode,arssteamcode,arssaoicode,arssempcode,arssredatatime,deptcode // | from // | dm_gis.gis_rds_omsfrom // |where // | inc_day='${inc_day}' // |) omsfrom // |on omsNon.sysOrderNo = omsfrom.sysOrderNo // |) oms // |left outer join // |( // |select * from ( // |select mainWaybillno, zoneCode,baroprcode,barscantm, // | row_number() over (partition by mainWaybillno order by barscantm desc) as rank from ods_kafka_fvp.fvp_core_fact_route_op // | where inc_day='${inc_day}' and opcode='54' and mainwaybillno <> '') b where b.rank =1 // |) fvp // |on oms.sysOrderNo = fvp.mainwaybillno // |) res // |left outer join // |( // |select // | city_code,area_code,area_name // |from // | cityToCode // |) citymap // |on res.cityCode = citymap.city_code // """.stripMargin val frame = spark.sql(calSql).toDF().withColumn("inc_day",lit(inc_day)).persist(StorageLevel.MEMORY_AND_DISK_SER) frame.take(2).foreach(println(_)) frame.write.mode(SaveMode.Overwrite).insertInto("dm_gis.aoi_area_stashou_undistinguish") logger.error("入库hive总量为:" + frame.count()) val frameToMysql = frame.rdd.map(x => { val names = Array("id","area","region","city","citycode","deptcode","zonecode","barscantm","address","company","phone","mobile","baroprcode","src","finalzc","finaltc","group_id","syncreqdatetime","arssdeptcode","arssteamcode","arssaoicode","arssempcode","arssredatatime","inc_day") var area = x.getString(0) if(!StringUtils.nonEmpty(area)){ area = "-" } var region = x.getString(1) if(!StringUtils.nonEmpty(region)){ region = "-" } val city = x.getString(2) val citycode = x.getString(3) val deptcode = x.getString(4) val barscantm = "2000-00-00 00:00:00.00" val zonecode = x.getString(6) val address = x.getString(7) val phone = x.getString(8) val mobile = x.getString(9) val company = "" val baroprcode = x.getString(10) val src = x.getString(11) val finalzc = x.getString(12) val finaltc = x.getString(13) val group_id = x.getString(14) var syncreqdatetime ="2000-00-00 00:00:00.00" if(StringUtils.nonEmpty(x.getString(15)) && x.getString(15).length>19){ syncreqdatetime = x.getString(15).substring(0,19).concat(".00") } val arssdeptcode = x.getString(16) val arssteamcode = x.getString(17) val arssaoicode = x.getString(18) val arssempcode = x.getString(19) var arssredatatime ="2000-00-00 00:00:00.00" if(StringUtils.nonEmpty(x.getString(20)) && x.getString(20).length>19){ arssredatatime = x.getString(20).substring(0,19).concat(".00") } val sysOrderNo = x.getString(21) val inc_day = x.getString(22) var uid = Array(citycode,address,region,src,syncreqdatetime,arssempcode,(new Random).nextInt(1000000000),inc_day).mkString uid = MessageDigest.getInstance("MD5").digest(uid.getBytes).map("%02x".format(_)).mkString AreaStaShou(uid,area,region,city,citycode,deptcode,zonecode,barscantm,address,company,phone,mobile,baroprcode,src,finalzc,finaltc,group_id,syncreqdatetime,arssdeptcode,arssteamcode,arssaoicode,arssempcode,arssredatatime,inc_day) }) val emptyTeamCode = frameToMysql.filter(x => {"".equals(x.arssteamcode)}) logger.error("emptyTeamCode的数据量为:" + emptyTeamCode.count()) val nonEmptyTeamCode = frameToMysql.filter(x => !{"".equals(x.arssteamcode)}) logger.error("nonEmptyTeamCode的数据量为:" + nonEmptyTeamCode.count()) saveDB(spark,emptyTeamCode,inc_day,DB_TABLE_NAME_No) saveDB(spark,nonEmptyTeamCode,inc_day,DB_TABLE_NAME_Has) } def deleteCurrentDateExistData(tableName: String, date: String,conn: Connection): Unit = { //val conn: Connection = DbUtils.getConnection(APPLICATION_DB) val sqlText = s"delete from $tableName where inc_day = '$date'" logger.error(">>>>>>保存之前先删除当天的数据,Delete Sql Text: " + sqlText) JdbcTemplateUtil.executeSql(conn, sqlText, null) if (!Objects.isNull(conn)) { logger.error(">>>>>>Close Delete Operation Of DataSource Connection......" + conn.isClosed) } } def saveDB(spark: SparkSession, rdd: RDD[AreaStaShou], date: String,DB_TABLE_NAME:String): Unit = { try { // 保存数据之前先删除当天的数据 val conn: Connection = DbUtils.getConnection(area_db_file) deleteCurrentDateExistData(DB_TABLE_NAME, date,conn) val fields = Array("uid","area","region","city","citycode","deptcode","zonecode","barscantm","address","company","phone","mobile","baroprcode","src","finalzc","finaltc","group_id","syncreqdatetime","arssdeptcode","arssteamcode","arssaoicode","arssempcode","arssredatatime","inc_day") val fieldsMkString = fields.mkString(",") val sqlText = s"""insert into $DB_TABLE_NAME ($fieldsMkString) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""".stripMargin logger.error(">>>>>>Insert Into Sql Text: " + sqlText) logger.error(">>>>>>连接至数据库 " + sqlText) var count = 0 var failedCount = 0 rdd.collect().foreach(obj => { count = count + 1 val detailArray: Array[Any] = Array( obj.uid,obj.area,obj.region,obj.city,obj.citycode,obj.deptcode,obj.zonecode,obj.barscantm,obj.address, obj.company,obj.phone,obj.mobile,obj.baroprcode,obj.src,obj.finalzc,obj.finaltc,obj.group_id,obj.syncreqdatetime, obj.arssdeptcode,obj.arssteamcode,obj.arssaoicode,obj.arssempcode,obj.arssredatatime,obj.inc_day ) val value = JdbcTemplateUtil.executeSqlAnyParams(conn, sqlText, detailArray) if (value < 0) { failedCount = failedCount + 1 } }) logger.error(">>>>>>累计推送指标数据到DB库的总数据量 " + count) logger.error(">>>>>>其中包括推送失败的数据量为: " + failedCount) logger.error(">>>>>>数据推送到DB库结束......") if (!Objects.isNull(conn)) { conn.close() logger.error(">>>>>>Close RDD Collect ForeachPartition Operation Of DataSource Connection......" + conn.isClosed) } } catch { case e: Exception => logger.error(">>>>>>aoi为空数据进运营数据仓库出现异常 Exception: " + e) } } }
spark写入mysql业务案例
最新推荐文章于 2021-12-10 16:30:40 发布