仅用作自己做笔记,保存代码
import org.apache.commons.lang.StringUtils
import org.apache.log4j.{Level, Logger}
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.internal.Logging
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
import scala.collection.mutable.ListBuffer
class DriveEventStatInDay extends BatchApplication {
def start(statConfigBC: Broadcast[StatConfig], spark: SparkSession, beginDate: String, endDate: String) = {
val dayListBuffer = getSearchTime(beginDate, endDate)
val statConfig = statConfigBC.value
spark.sql(s" use ${statConfig.hiveDB}")
dayListBuffer.foreach(timeTuple => {
val sqlBulider = new StringBuilder
sqlBulider.append(s"SELECT ${Constants.VIN}, ${Constants.MILEAGE}, ${Constants.TYPE}, ${Constants.BEHAVIORNAME}, ${Constants.FUELCONSUMPTION}, ")
sqlBulider.append(s"${Constants.STARTECUMILEAGE}, ${Constants.ENDMILEAGE}, ")
sqlBulider.append(s" ${Constants.STARTTIME}, ${Constants.ENDTIME}, ${Constants.STARTLONGITUDE}, ${Constants.STARTLATITUDE} FROM ${statConfig.behaviorDataTable} ")
sqlBulider.append(s" WHERE ${Constants.STARTTIME} >= ${timeTuple.beginTime} AND ${Constants.STARTTIME} <= ${timeTuple.endTime} ")
sqlBulider.append(s" AND ${Constants.TYPE} >= 4097 AND ${Constants.TYPE} <= 4112 AND ${Constants.BEHAVIORNAME} IS NOT NULL AND ${Constants.ENDTIME} > 0 ")
logInfo(s" ============xxxsql = ${sqlBulider.mkString}")
val dataFrame = spark.sql(sqlBulider.mkString)
val dataRdd = calculateBehaviorStat(dataFrame, statConfigBC, timeTuple.stat_day)
save2Mysql(dataRdd, statConfigBC)
})
}
private def calculateBehaviorStat(dataFrame: DataFrame, statConfigBC: Broadcast[StatConfig],
stat_day: String) = {
dataFrame.rdd.groupBy(_.getAs[String]("vin")).mapPartitions(parts => {
val jedis = statConfigBC.value.redisBusinessEndpoint.connect()
val resultListBuffer = new ListBuffer[DriveEventStatInDayBean]
parts.foreach(part => {
val vin = part._1
val vehicleInfo = jedis.get(Constants.JEDISPREFIX_VEHICLEINFO + vin)
val vehicleMap = string2Json(vehicleInfo)
var engine_type: String = null
var engine_no: String = null
var fuel_type: Int = 1
var use_type: String = null
var agency_id: Int = -1
var org_id: String = null
var vehicle_type: String = null
var org_rid: String = null
var province: String = null
var chassis_no: String = if (StringUtils.length(vin) >= 8)
StringUtils.substring(vin, StringUtils.length(vin) - 8) else null
if (null != vehicleMap) {
engine_type = if (null != vehicleMap.get(Constants.ENGINE_TYPE)) String.valueOf(vehicleMap.get(Constants.ENGINE_TYPE)) else null
engine_no = if (null != vehicleMap.get(Constants.ENGINE_NO)) String.valueOf(vehicleMap.get(Constants.ENGINE_NO)) else null
use_type = if (null != vehicleMap.get(Constants.USE_TYPE)) String.valueOf(vehicleMap.get(Constants.USE_TYPE)) else null
agency_id = if (null != vehicleMap.get(Constants.AGENCY_ID)) String.valueOf(vehicleMap.get(Constants.AGENCY_ID)).toInt else -1
fuel_type = if (null != vehicleMap.get(Constants.FUEL_TYPE)) String.valueOf(vehicleMap.get(Constants.FUEL_TYPE)).toInt else 1
org_id = if (null != vehicleMap.get(Constants.ORG_ID)) String.valueOf(vehicleMap.get(Constants.ORG_ID)) else null
vehicle_type = if (null != vehicleMap.get(Constants.VEHICLE_TYPE_NAME)) String.valueOf(vehicleMap.get(Constants.VEHICLE_TYPE_NAME)) else null
chassis_no = if (null != vehicleMap.get(Constants.CHASSIS_NO)) String.valueOf(vehicleMap.get(Constants.CHASSIS_NO)) else chassis_no
org_rid = if (null != vehicleMap.get(Constants.CUSTOMERORGANIZATIONRID)) String.valueOf(vehicleMap.get(Constants.CUSTOMERORGANIZATIONRID)) else null
}
val dataList = part._2.toList.sortBy(_.getAs[Long](Constants.STARTTIME))
var faultCntMap = Map.empty[String, DriveEventStatInDayBean]
var firstTime: Long = 0
var curBeginTime: Long = 0
var curEndTime: Long = 0
var curBeginMile: Double = 0
var curEndMile: Double = 0
var firstMile: Double = 0
dataList.foreach(data => {
val driveEventCode = data.getAs[Int](Constants.TYPE)
val driveEventName = data.getAs[String](Constants.BEHAVIORNAME)
curBeginTime = data.getAs[Long](Constants.STARTTIME)
curEndTime = data.getAs[Long](Constants.ENDTIME)
if (0 >= firstTime) {
firstTime = curBeginTime
}
curBeginMile = data.getAs[Double](Constants.STARTECUMILEAGE)
curEndMile = data.getAs[Double](Constants.ENDMILEAGE)
if (0 >= firstMile) {
firstMile = curBeginMile
}
val seconds = (curEndTime - curBeginTime) / 1000
val miles = data.getAs[Double](Constants.MILEAGE)
val fuels = data.getAs[Double](Constants.FUELCONSUMPTION)
//计算省份
val lon = data.getAs[Double](Constants.STARTLONGITUDE)
val lat = data.getAs[Double](Constants.STARTLATITUDE)
province = NationalAeraGeoPoints.getAreaName(lon, lat)
val key = String.format("%s-%s", String.valueOf(driveEventCode), province)
if (faultCntMap.get(key).isEmpty) {
val bean = new DriveEventStatInDayBean
//bean.id = UUID.randomUUID().toString.replaceAll("-", "")
bean.id = vin + stat_day + driveEventCode
bean.vin = vin
bean.chassis_no = chassis_no
bean.agency_id = agency_id
bean.engine_type = engine_type
bean.engine_no = engine_no
bean.org_id = org_id
bean.use_type = use_type
bean.vehicle_type = vehicle_type
bean.province = province
bean.org_rid = org_rid
bean.stat_day = stat_day
bean.cnt = 1
bean.miles = miles
bean.fuels = fuels
bean.seconds = seconds
bean.driveevent_code = driveEventCode
bean.driveevent_name = driveEventName
bean.begin_happen_time = firstTime
bean.end_happen_time = curEndTime
bean.begin_happen_mile = curBeginMile
bean.end_happen_mile = curEndMile
faultCntMap += (key -> bean)
} else {
faultCntMap.get(key).get.cnt += 1
faultCntMap.get(key).get.miles += miles
faultCntMap.get(key).get.fuels += fuels
faultCntMap.get(key).get.seconds += seconds
faultCntMap.get(key).get.end_happen_time = curEndTime
faultCntMap.get(key).get.end_happen_mile = curEndMile
}
})
faultCntMap.foreach(data => resultListBuffer += data._2)
})
jedis.close()
resultListBuffer.toIterator
})
}
private def saveResult(spark: SparkSession, rdd: RDD[DriveEventStatInDayBean],
statConfig: StatConfig): Unit = {
val connectionProperties = statConfig.getConnectionProperties()
spark.createDataFrame(rdd, classOf[DriveEventStatInDayBean]).write.mode(SaveMode.Append)
.jdbc(connectionProperties._1, statConfig.behaviorStatIndayTable, connectionProperties._2)
}
private def save2Mysql(rdd: RDD[DriveEventStatInDayBean], applicationConfigBC: Broadcast[StatConfig]) = {
rdd.foreachPartition(part => {
if (part.nonEmpty) {
val config = applicationConfigBC.value
val table = config.behaviorStatIndayTable
//val table = "test_lqcv_behavior_stat_inday"
val sql = s" INSERT INTO ${table} ( " +
s"id,vin,stat_day,engine_type,use_type," +
s"vehicle_type,province,season,driveevent_code,driveevent_name," +
s"seconds,fuels,miles,cnt,begin_happen_time," +
s"begin_happen_mile,end_happen_time,end_happen_mile,chassis_no,agency_id," +
s"org_id,fuel_type,engine_no,org_rid" +
s" ) " +
s"VALUES (" +
s"?, ?, ?, ?, ?, " +
s"?, ?, ?, ?, ?, " +
s"?, ?, ?, ?, ?, " +
s"?, ?, ?, ?, ?, " +
s"?, ?, ?, ? ) ON DUPLICATE KEY UPDATE " +
s"cnt = VALUES( cnt ),miles = VALUES( miles ),fuels = VALUES( fuels ),seconds = VALUES( seconds )," +
s"end_happen_time = VALUES( end_happen_time ),begin_happen_mile = VALUES( begin_happen_mile )"
val connection = config.getConnection()
connection.setAutoCommit(false)
val statement = connection.prepareStatement(sql)
part.foreach(record => {
try {
statement.setString(1, record.id)
statement.setString(2, record.vin)
statement.setString(3, record.stat_day)
statement.setString(4, record.engine_type)
statement.setString(5, record.use_type)
statement.setString(6, record.vehicle_type)
statement.setString(7, record.province)
statement.setString(8, record.season)
statement.setInt(9, record.driveevent_code)
statement.setString(10, record.driveevent_name)
statement.setLong(11, record.seconds)
statement.setDouble(12, record.fuels)
statement.setInt(13, record.miles.toInt)
statement.setDouble(14, record.cnt)
statement.setDouble(15, record.begin_happen_time)
statement.setDouble(16, record.begin_happen_mile)
statement.setLong(17, record.end_happen_time)
statement.setDouble(18, record.end_happen_mile)
statement.setString(19, record.chassis_no)
statement.setInt(20, record.agency_id)
statement.setString(21, record.org_id)
statement.setInt(22, record.fuel_type)
statement.setString(23, record.engine_no)
statement.setString(24, record.org_rid)
statement.addBatch()
} catch {
case e: Exception => {
logError("每天统计数据报错", e)
}
}
})
statement.executeBatch()
connection.commit()
statement.close()
config.closeConnection(connection)
}
})
}
}
object DriveEventStatInDay extends Logging {
def main(args: Array[String]): Unit = {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("com.dfssi").setLevel(Level.INFO)
Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
val lines = BatchApplication.parseArgs("DriveEventStatInDay", args)
val configPath = lines.getOptionValue("configPath")
val configName = lines.getOptionValue("configName", "stat_config.xml")
val appName = lines.getOptionValue("appName", "DriveEventStatInDay")
val beginDate = lines.getOptionValue("beginDate")
val endDate = lines.getOptionValue("endDate")
//val beginDate = "20200214"
//val endDate = "20200214"
logInfo(" 任务启动配置如下 : ")
logInfo(s" appName : $appName ")
logInfo(s" configPath : $configPath ")
logInfo(s" configName : $configName ")
logInfo(s" beginDate : $beginDate ")
logInfo(s" endDate : $endDate ")
new DriveEventStatInDay().execute(appName, configName, configPath, beginDate, endDate)
}
}