1 遇到的问题
$ sh altp.sh "2018-09-21 01:21:34" "2018-10-21 12:32:11"
传到 altp.sh里面后 会变成4个参数 传给jar的args
2018-09-21
01:21:34
2018-10-21
12:32:11
2.以不带空格的时间格式进行传参,然后在scala代码里面重新 转换为2018-10-21 12:32:11 这种标准格式
shell 传参
# 传参时间格式 yyyymmddHHMMSS
# 例如:$ sh altp.sh 20180921012134 20181021123211
spark-submit --master yarn-client --class com.gree.entrance.Begin --driver-memory 4g --num-executors 20 --executor-cores 4 --executor-memory 15g --driver-java-options '-Xms5g -Xmx5g -XX:+UseCompressedOops' --conf spark.driver.maxResultSize=2g --conf spark.executor.heartbeatInterval=15s --conf spark.yarn.executor.memoryOverhead=3072 --conf spark.memory.useLegacyMode=true --conf spark.shuffle.memoryFraction=0.5 --conf spark.storage.memoryFraction=0.3 --conf "spark.executor.extraJavaOptions=-XX:+UseG1GC -XX:InitiatingHeapOccupancyPercent=25 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps" --queue production /home/gree/msa/yw_msa/paiQiDiWenProject/airoutLowTempPro_autoDiagnostic.jar $1 $2
scala 转换
if (argsStr.length == 2) {
println("本次执行人工指定参数个数为2个")
val stemp = DateTime.parse(argsStr(0), DateTimeFormat.forPattern("yyyyMMddHHmmss"))
val stime = stemp.toString(DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss"))
val etemp = DateTime.parse(argsStr(1), DateTimeFormat.forPattern("yyyyMMddHHmmss"))
val etime = etemp.toString(DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss"))
println(stime)
println(etime)
//如果提交运行时传2个时间参数,则按照第一个参数的时间作为拉取本次数据源的开始时间,第二个作为结束时间
timeStampHisMaxTime = Timestamp.valueOf(stime)
timeStampCurrMaxTime = Timestamp.valueOf(etime)
spark与该脚本结合使用的例子
import java.sql.Timestamp
import com.gree.util.{DateUtil, DmSpark, MysqlConnTestToll}
import org.apache.spark.sql.{DataFrame, SaveMode}
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions._
import org.apache.spark.storage.StorageLevel
import org.joda.time.DateTime
import org.joda.time.format.DateTimeFormat
object Dologic1 {
private val sc = DmSpark.getSC()
private val sqlHiveContext = DmSpark.getSqlHiveContext()
import sqlHiveContext.implicits._
val w2 = Window.partitionBy('systemid, 'subdevid, 'recordtime) //.orderBy("minute")
def main(argsStr: Array[String]): Unit = {
// get source
val all = GetSource(argsStr)
val idu = all._1
val busi_mod = all._2
val home_mod = all._3
val currMaxTimeStr = all._4
// dologic
val isOk= dologic(idu, busi_mod, home_mod, currMaxTimeStr)
if(isOk)
increTime(currMaxTimeStr)
}
def GetHistoryMaxTime(tableName: String): DataFrame = {
sqlHiveContext.sql("create table if not exists " + "gmv_gprs_new.altp_historymaxreceivetime" + "(maxTime Timestamp,tableName String)")
//获取历史最大时间
//如果第一次执行,则初始化一个最大时间(将字符串转为df)
val initailTime = "2018-08-18 23:59:59"
val hisMaxTime = sqlHiveContext.table("gmv_gprs_new.altp_historymaxreceivetime").filter('tableName === tableName)
.agg((when(count('maxTime) === 0, Timestamp.valueOf(initailTime)) otherwise (max('maxTime))) as ("maxLastTime")).distinct()
hisMaxTime
}
def GetSource(argsStr: Array[String]): (DataFrame, DataFrame, DataFrame, String) = {
// 初始化本次开始、结束时间
var currMaxTimeStr = "2018-09-25 00:00:00"
var timeStampHisMaxTime = Timestamp.valueOf("2018-09-25 00:00:00")
var timeStampCurrMaxTime = Timestamp.valueOf("2018-09-25 00:00:00")
//如果提交运行时没有传时间参数,则按照默认增量方式拉取本次数据源(开始时间和结束时间都从增量表获取)
if (argsStr.length == 0) {
println("本次执行人工指定参数个数为0个")
// 获取历史最大解析时间
val tableName = "sheeldLogic"
val hisMaxTimeDF = GetHistoryMaxTime(tableName)
hisMaxTimeDF.cache()
val hisMaxTimeStr = hisMaxTimeDF.collectAsList().get(0).get(0).toString
timeStampHisMaxTime = Timestamp.valueOf(hisMaxTimeStr)
val next3dayTimeStampHisMaxTime = DateUtil.getAddTime(timeStampHisMaxTime, +3 * 24 * 60 * 60)
//获取当前最大解析时间(取历史最大时间的 下2天中的最大时间作为当前最大时间,以避免本次拉取数据过多造成执行失败)
val currMaxTimeDF = sqlHiveContext.table("gree_monitorcenter_newgprs.msa_new_idu_data").filter('receivetime > timeStampHisMaxTime && 'receivetime <= next3dayTimeStampHisMaxTime).agg(max('receivetime) as ("maxTime")).distinct() //.show(false)
currMaxTimeDF.cache()
// 判断当前最大值是否为空
if (currMaxTimeDF.collectAsList().get(0).anyNull) {
println("没有新增数据 本次不进行诊断 !!")
sys.exit(1)
}
currMaxTimeStr = currMaxTimeDF.collectAsList().get(0).get(0).toString
timeStampCurrMaxTime = Timestamp.valueOf(currMaxTimeStr)
} else if (argsStr.length == 2) {
println("本次执行人工指定参数个数为2个")
val stemp = DateTime.parse(argsStr(0), DateTimeFormat.forPattern("yyyyMMddHHmmss"))
val stime = stemp.toString(DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss"))
val etemp = DateTime.parse(argsStr(1), DateTimeFormat.forPattern("yyyyMMddHHmmss"))
val etime = etemp.toString(DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss"))
println(stime)
println(etime)
//如果提交运行时传2个时间参数,则按照第一个参数的时间作为拉取本次数据源的开始时间,第二个作为结束时间
timeStampHisMaxTime = Timestamp.valueOf(stime)
timeStampCurrMaxTime = Timestamp.valueOf(etime)
currMaxTimeStr = etime
} else if (argsStr.length == 1) {
println("本次执行人工指定参数个数为1个")
val etemp = DateTime.parse(argsStr(0), DateTimeFormat.forPattern("yyyyMMddHHmmss"))
val etime = etemp.toString(DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss"))
println(etime)
//如果提交运行时传1个时间参数,则按照所给参数(时间)作为拉取本次数据源的结束时间,开始时间从增量表自动获取
//获取历史最大解析时间
val tableName = "altp_autodg_before"
val hisMaxTimeDF = GetHistoryMaxTime(tableName)
hisMaxTimeDF.cache()
val hisMaxTimeStr = hisMaxTimeDF.collectAsList().get(0).get(0).toString
timeStampHisMaxTime = Timestamp.valueOf(hisMaxTimeStr)
timeStampCurrMaxTime = Timestamp.valueOf(etime)
currMaxTimeStr = etime
} else {
println("本次执行人工指定参数个数大于2个!")
println("需要 <=2 个参数 !!")
sys.exit(1)
}
println("历史最大时间" + timeStampHisMaxTime + "当前最大时间:" + timeStampCurrMaxTime)
val idu = sqlHiveContext.table("gree_monitorcenter_newgprs.msa_new_idu_data")
.filter('receivetime > timeStampHisMaxTime && 'receivetime <= timeStampCurrMaxTime)
.select('systemid, 'subdevid, 'recordtime, 'OnOffSta)
.withColumnRenamed("subdevid", "in_subdevid")
.distinct()
val busi_mod = sqlHiveContext.table("gree_monitorcenter_newgprs.msa_new_busi_mo")
.filter('receivetime > timeStampHisMaxTime && 'receivetime <= timeStampCurrMaxTime)
// 1、同一故障时间,idus表中开关机状态为0时,module表中压缩机1驱动模块复位故障位为1,该故障不统计;
// 2、同一故障时间,idus表中开关机状态为1时,且module表中压缩机1目标频率为0,且压缩机1工作状态为2,该故障不统计;
.select('systemid, 'subdevid, 'recordtime, 'Comp1DriResetErr, 'Comp2DriResetErr, 'Fan1DriResetSta, 'Fan2DriResetErr,
'Comp1AimHz, 'Comp2AimHz, 'Fan1AimHz, 'Fan2AimHz,
'Comp1ProgramSta, 'Comp2ProgramSta, 'Fan1ProgramSta, 'Fan2ProgramSta, 'Comp1DriIPMErr, 'Comp2DriIPMErr)
.distinct()
val home_mod = sqlHiveContext.table("gree_monitorcenter_newgprs.msa_new_home_mo")
.filter('receivetime > timeStampHisMaxTime && 'receivetime <= timeStampCurrMaxTime)
// 1、同一故障时间,idus表中开关机状态为0时,module表中压缩机1驱动模块复位故障位为1,该故障不统计;
// 2、同一故障时间,idus表中开关机状态为1时,且module表中压缩机1目标频率为0,且压缩机1工作状态为2,该故障不统计;
.select('systemid, 'subdevid, 'recordtime, 'Comp1DriResetErr, 'Comp2DriResetErr, 'Fan1DriResetSta, 'Fan2DriResetErr,
'Comp1AimHz, 'Comp2AimHz, 'Fan1AimHz, 'Fan2AimHz,
// 家用没有:'Comp2ProgramSta,'Fan1ProgramSta,'Fan2ProgramSta
'Comp1ProgramSta, 'Comp1DriIPMErr, 'Comp2DriIPMErr)
(idu, busi_mod, home_mod, currMaxTimeStr)
}
def dologic(idu: DataFrame, busi_mod: DataFrame, home_mod: DataFrame, currMaxTimeStr: String): Boolean = {
idu.persist(StorageLevel.MEMORY_AND_DISK)
val busi_mod_tmp = idu.join(busi_mod, idu("systemid") === busi_mod("systemid") && idu("recordtime") === busi_mod("recordtime")).select(idu("in_subdevid"), idu("OnOffSta"), busi_mod("*"))
.withColumn("kaiji_num", sum('OnOffSta).over(w2))
.dropDuplicates(Array("systemid", "subdevid", "recordtime"))
busi_mod_tmp.persist(StorageLevel.MEMORY_AND_DISK)
val home_mod_tmp = idu.join(home_mod, idu("systemid") === home_mod("systemid") && idu("recordtime") === home_mod("recordtime")).select(idu("in_subdevid"), idu("OnOffSta"), home_mod("*"))
.withColumn("kaiji_num", sum('OnOffSta).over(w2))
.dropDuplicates(Array("systemid", "subdevid", "recordtime"))
home_mod_tmp.persist(StorageLevel.MEMORY_AND_DISK)
// 商用
val bres1 = busi_mod_tmp
.filter(('kaiji_num === 0 && 'Comp1DriResetErr === 1) || ('kaiji_num > 0 && 'Comp1AimHz === 0 && 'Comp1ProgramSta === 2))
.withColumnRenamed("recordtime", "errortime")
.select('systemid, 'subdevid, 'errortime, 'kaiji_num, 'Comp1DriResetErr, 'Comp1AimHz, 'Comp1ProgramSta)
MysqlConnTestToll.resultToMYsql(bres1, "CompAndFanSheild_help")
true
}
def increTime(currMaxTimeStr: String): Unit = {
// 将当前最大解析时间存增量时间记录表
val currMaxTimeDF = sqlHiveContext.createDataFrame(Seq(("tableA", "2018-09-21 00:00:00"))).toDF("tableName", "maxTime")
.agg((when(count(col("maxTime")) === 1, Timestamp.valueOf(currMaxTimeStr)) otherwise (Timestamp.valueOf(currMaxTimeStr))) as ("maxTime"))
val maxTimeDF = currMaxTimeDF.withColumn("tableName", lit("sheeldLogic"))
.write.mode(SaveMode.Append).saveAsTable("gmv_gprs_new.altp_historyMaxReceivetime")
println(DateTime.now().toString("yyyy-MM-dd HH:mm:ss"))
println("---end---")
println("done")
}
}
// filter函数:非的过滤
// 条件:Comp1ProgramSta不等于105 且Comp1DriIPMErr等于1 或者kaiji_num等于0且Comp1DriIPMErr等于1
正确的filter:
.filter(('kaiji_num === 0 && 'Comp1DriIPMErr === 1) || (!('Comp1ProgramSta === 105)&&'Comp1DriIPMErr === 1 ))
错误的filter:
.filter(('kaiji_num === 0 && 'Comp1DriIPMErr === 1) || ('Comp1ProgramSta !== 105&&'Comp1DriIPMErr === 1 ))