SparkSql 写数据库updata操作

仅用作自己做笔记,保存代码

import org.apache.commons.lang.StringUtils
import org.apache.log4j.{Level, Logger}
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.internal.Logging
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}

import scala.collection.mutable.ListBuffer

class DriveEventStatInDay extends BatchApplication {

  def start(statConfigBC: Broadcast[StatConfig], spark: SparkSession, beginDate: String, endDate: String) = {

    val dayListBuffer = getSearchTime(beginDate, endDate)

    val statConfig = statConfigBC.value
    spark.sql(s" use ${statConfig.hiveDB}")
    dayListBuffer.foreach(timeTuple => {
      val sqlBulider = new StringBuilder
      sqlBulider.append(s"SELECT ${Constants.VIN}, ${Constants.MILEAGE}, ${Constants.TYPE}, ${Constants.BEHAVIORNAME}, ${Constants.FUELCONSUMPTION}, ")
      sqlBulider.append(s"${Constants.STARTECUMILEAGE}, ${Constants.ENDMILEAGE}, ")
      sqlBulider.append(s" ${Constants.STARTTIME}, ${Constants.ENDTIME}, ${Constants.STARTLONGITUDE}, ${Constants.STARTLATITUDE} FROM ${statConfig.behaviorDataTable} ")
      sqlBulider.append(s" WHERE ${Constants.STARTTIME} >= ${timeTuple.beginTime} AND ${Constants.STARTTIME} <= ${timeTuple.endTime} ")
      sqlBulider.append(s" AND ${Constants.TYPE} >= 4097 AND ${Constants.TYPE} <= 4112 AND ${Constants.BEHAVIORNAME} IS NOT NULL AND ${Constants.ENDTIME} > 0 ")

      logInfo(s" ============xxxsql = ${sqlBulider.mkString}")
      val dataFrame = spark.sql(sqlBulider.mkString)

      val dataRdd = calculateBehaviorStat(dataFrame, statConfigBC, timeTuple.stat_day)

      save2Mysql(dataRdd, statConfigBC)

    })

  }

  private def calculateBehaviorStat(dataFrame: DataFrame, statConfigBC: Broadcast[StatConfig],
                                    stat_day: String) = {
    dataFrame.rdd.groupBy(_.getAs[String]("vin")).mapPartitions(parts => {
      val jedis = statConfigBC.value.redisBusinessEndpoint.connect()

      val resultListBuffer = new ListBuffer[DriveEventStatInDayBean]
      parts.foreach(part => {
        val vin = part._1

        val vehicleInfo = jedis.get(Constants.JEDISPREFIX_VEHICLEINFO + vin)
        val vehicleMap = string2Json(vehicleInfo)
        var engine_type: String = null
        var engine_no: String = null
        var fuel_type: Int = 1
        var use_type: String = null
        var agency_id: Int = -1
        var org_id: String = null
        var vehicle_type: String = null
        var org_rid: String = null
        var province: String = null
        var chassis_no: String = if (StringUtils.length(vin) >= 8)
          StringUtils.substring(vin, StringUtils.length(vin) - 8) else null

        if (null != vehicleMap) {
          engine_type = if (null != vehicleMap.get(Constants.ENGINE_TYPE)) String.valueOf(vehicleMap.get(Constants.ENGINE_TYPE)) else null
          engine_no = if (null != vehicleMap.get(Constants.ENGINE_NO)) String.valueOf(vehicleMap.get(Constants.ENGINE_NO)) else null
          use_type = if (null != vehicleMap.get(Constants.USE_TYPE)) String.valueOf(vehicleMap.get(Constants.USE_TYPE)) else null
          agency_id = if (null != vehicleMap.get(Constants.AGENCY_ID)) String.valueOf(vehicleMap.get(Constants.AGENCY_ID)).toInt else -1
          fuel_type = if (null != vehicleMap.get(Constants.FUEL_TYPE)) String.valueOf(vehicleMap.get(Constants.FUEL_TYPE)).toInt else 1
          org_id = if (null != vehicleMap.get(Constants.ORG_ID)) String.valueOf(vehicleMap.get(Constants.ORG_ID)) else null
          vehicle_type = if (null != vehicleMap.get(Constants.VEHICLE_TYPE_NAME)) String.valueOf(vehicleMap.get(Constants.VEHICLE_TYPE_NAME)) else null
          chassis_no = if (null != vehicleMap.get(Constants.CHASSIS_NO)) String.valueOf(vehicleMap.get(Constants.CHASSIS_NO)) else chassis_no
          org_rid = if (null != vehicleMap.get(Constants.CUSTOMERORGANIZATIONRID)) String.valueOf(vehicleMap.get(Constants.CUSTOMERORGANIZATIONRID)) else null
        }

        val dataList = part._2.toList.sortBy(_.getAs[Long](Constants.STARTTIME))
        var faultCntMap = Map.empty[String, DriveEventStatInDayBean]
        var firstTime: Long = 0
        var curBeginTime: Long = 0
        var curEndTime: Long = 0
        var curBeginMile: Double = 0
        var curEndMile: Double = 0
        var firstMile: Double = 0
        dataList.foreach(data => {
          val driveEventCode = data.getAs[Int](Constants.TYPE)
          val driveEventName = data.getAs[String](Constants.BEHAVIORNAME)

          curBeginTime = data.getAs[Long](Constants.STARTTIME)
          curEndTime = data.getAs[Long](Constants.ENDTIME)
          if (0 >= firstTime) {
            firstTime = curBeginTime
          }

          curBeginMile = data.getAs[Double](Constants.STARTECUMILEAGE)
          curEndMile = data.getAs[Double](Constants.ENDMILEAGE)
          if (0 >= firstMile) {
            firstMile = curBeginMile
          }

          val seconds = (curEndTime - curBeginTime) / 1000
          val miles = data.getAs[Double](Constants.MILEAGE)
          val fuels = data.getAs[Double](Constants.FUELCONSUMPTION)
          //计算省份
          val lon = data.getAs[Double](Constants.STARTLONGITUDE)
          val lat = data.getAs[Double](Constants.STARTLATITUDE)
          province = NationalAeraGeoPoints.getAreaName(lon, lat)

          val key = String.format("%s-%s", String.valueOf(driveEventCode), province)

          if (faultCntMap.get(key).isEmpty) {
            val bean = new DriveEventStatInDayBean
            //bean.id = UUID.randomUUID().toString.replaceAll("-", "")

            bean.id = vin + stat_day + driveEventCode
            bean.vin = vin
            bean.chassis_no = chassis_no
            bean.agency_id = agency_id
            bean.engine_type = engine_type
            bean.engine_no = engine_no
            bean.org_id = org_id
            bean.use_type = use_type
            bean.vehicle_type = vehicle_type
            bean.province = province
            bean.org_rid = org_rid
            bean.stat_day = stat_day
            bean.cnt = 1
            bean.miles = miles
            bean.fuels = fuels
            bean.seconds = seconds
            bean.driveevent_code = driveEventCode
            bean.driveevent_name = driveEventName
            bean.begin_happen_time = firstTime
            bean.end_happen_time = curEndTime
            bean.begin_happen_mile = curBeginMile
            bean.end_happen_mile = curEndMile

            faultCntMap += (key -> bean)
          } else {
            faultCntMap.get(key).get.cnt += 1
            faultCntMap.get(key).get.miles += miles
            faultCntMap.get(key).get.fuels += fuels
            faultCntMap.get(key).get.seconds += seconds
            faultCntMap.get(key).get.end_happen_time = curEndTime
            faultCntMap.get(key).get.end_happen_mile = curEndMile
          }

        })

        faultCntMap.foreach(data => resultListBuffer += data._2)
      })

      jedis.close()

      resultListBuffer.toIterator
    })
  }

  private def saveResult(spark: SparkSession, rdd: RDD[DriveEventStatInDayBean],
                         statConfig: StatConfig): Unit = {

    val connectionProperties = statConfig.getConnectionProperties()

    spark.createDataFrame(rdd, classOf[DriveEventStatInDayBean]).write.mode(SaveMode.Append)
      .jdbc(connectionProperties._1, statConfig.behaviorStatIndayTable, connectionProperties._2)
  }

  private def save2Mysql(rdd: RDD[DriveEventStatInDayBean], applicationConfigBC: Broadcast[StatConfig]) = {
    rdd.foreachPartition(part => {

      if (part.nonEmpty) {
        val config = applicationConfigBC.value
        val table = config.behaviorStatIndayTable
        //val table = "test_lqcv_behavior_stat_inday"

        val sql = s" INSERT INTO ${table} ( " +
          s"id,vin,stat_day,engine_type,use_type," +
          s"vehicle_type,province,season,driveevent_code,driveevent_name," +
          s"seconds,fuels,miles,cnt,begin_happen_time," +
          s"begin_happen_mile,end_happen_time,end_happen_mile,chassis_no,agency_id," +
          s"org_id,fuel_type,engine_no,org_rid" +
          s" ) " +
          s"VALUES (" +
          s"?, ?, ?, ?, ?, " +
          s"?, ?, ?, ?, ?, " +
          s"?, ?, ?, ?, ?, " +
          s"?, ?, ?, ?, ?, " +
          s"?, ?, ?, ? ) ON DUPLICATE KEY UPDATE " +
          s"cnt = VALUES( cnt ),miles = VALUES( miles ),fuels = VALUES( fuels ),seconds = VALUES( seconds )," +
          s"end_happen_time = VALUES( end_happen_time ),begin_happen_mile = VALUES( begin_happen_mile )"

        val connection = config.getConnection()
        connection.setAutoCommit(false)
        val statement = connection.prepareStatement(sql)

        part.foreach(record => {
          try {
            statement.setString(1, record.id)
            statement.setString(2, record.vin)
            statement.setString(3, record.stat_day)
            statement.setString(4, record.engine_type)
            statement.setString(5, record.use_type)
            statement.setString(6, record.vehicle_type)
            statement.setString(7, record.province)
            statement.setString(8, record.season)
            statement.setInt(9, record.driveevent_code)
            statement.setString(10, record.driveevent_name)
            statement.setLong(11, record.seconds)
            statement.setDouble(12, record.fuels)
            statement.setInt(13, record.miles.toInt)
            statement.setDouble(14, record.cnt)
            statement.setDouble(15, record.begin_happen_time)
            statement.setDouble(16, record.begin_happen_mile)
            statement.setLong(17, record.end_happen_time)
            statement.setDouble(18, record.end_happen_mile)
            statement.setString(19, record.chassis_no)
            statement.setInt(20, record.agency_id)
            statement.setString(21, record.org_id)
            statement.setInt(22, record.fuel_type)
            statement.setString(23, record.engine_no)
            statement.setString(24, record.org_rid)

            statement.addBatch()
          } catch {
            case e: Exception => {
              logError("每天统计数据报错", e)
            }
          }

        })
        statement.executeBatch()
        connection.commit()

        statement.close()
        config.closeConnection(connection)
      }
    })
  }

}

object DriveEventStatInDay extends Logging {

  def main(args: Array[String]): Unit = {
    Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
    Logger.getLogger("com.dfssi").setLevel(Level.INFO)
    Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)

    val lines = BatchApplication.parseArgs("DriveEventStatInDay", args)
    val configPath = lines.getOptionValue("configPath")
    val configName = lines.getOptionValue("configName", "stat_config.xml")
    val appName = lines.getOptionValue("appName", "DriveEventStatInDay")
    val beginDate = lines.getOptionValue("beginDate")
    val endDate = lines.getOptionValue("endDate")

    //val beginDate = "20200214"
    //val endDate = "20200214"

    logInfo(" 任务启动配置如下 : ")
    logInfo(s" 		appName  :  $appName ")
    logInfo(s" 		configPath   :  $configPath ")
    logInfo(s" 		configName   :  $configName ")
    logInfo(s" 		beginDate   :  $beginDate ")
    logInfo(s" 		endDate   :  $endDate ")

    new DriveEventStatInDay().execute(appName, configName, configPath, beginDate, endDate)
  }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值