test code

def taskcal(data:Array[(String,Long)],rt:Array[String],wd:Int):Array[Boolean]={
val result = Array.fill[Boolean](rt.length)(false)
val sortData = data.sortBy(_._2)
val indexArrayLength = rt.length - 1
var startTimeArray = Array.fill[Long](rt.length)(0l)
val indexMap = rt.map(item => item -> rt.indexOf(item)).toMap
var notFull = true
for(itemWithTimeKv <- sortData if notFull ){
val itemIndex = indexMap(itemWithTimeKv._1)
if(itemIndex == 0) { startTimeArray(0) = itemWithTimeKv._2 ; result(0) = true} // first item
else if(startTimeArray(itemIndex-1) !=0) { // pre item exists?
if( (itemWithTimeKv._2 - startTimeArray(itemIndex-1))< wd) { // in range
startTimeArray(itemIndex) = startTimeArray(itemIndex-1)
result(itemIndex) = true
}else // out range
startTimeArray = Array.fill[Long](rt.length)(0l)
}
if(result(indexArrayLength) == true) notFull = false
}
result
}

def main(args:Array[String]): Unit = {

val data =Array(("A",1450000000000l),
("B",1450000000001l),
("C",1430000000002l),
("A",1460000000001l)
)
val rt = Array("A","B","C")
val wd = 3600000
println(taskcal(data,rt,wd).mkString(","))
// bench("r",100000,taskcal(data,rt,wd))
}


def bench(name:String,count:Int,f: => Unit): Unit ={
val begin = System.currentTimeMillis()
for(i <-0 to count) f
val end = System.currentTimeMillis()
println(s"name : ${name} count: $count count:${end - begin} ")
}


val storageDir = "UserBehaviorDStream"
val sparkConf = new SparkConf().setAppName("NetworkWordCount")
val ssc = new StreamingContext(sparkConf, Seconds(10))
val lines = ssc.socketTextStream("localhost", 9999, StorageLevel.MEMORY_AND_DISK_SER)
val sqlContext = new SQLContext(ssc.sparkContext)
val getPartitionDate = udf(FunnelUtil.getDatePartiton _)
lines.foreachRDD(rdd => {
val userBehiviorData = rdd.map(x => {println("**:"+x);x.split(",")}).filter(_.length == 6)
.map(r => RowFactory.create(r(0), r(1), r(2), r(3), r(4), r(5)))
val userBehiviorDataDF = sqlContext.createDataFrame(userBehiviorData, getStructType)
val userBehiviorDataPartition = userBehiviorDataDF.withColumn("yyyyMMddHH", getPartitionDate(userBehiviorDataDF("eventTime"))).coalesce(1)
userBehiviorDataPartition.write.format("parquet").mode("append").partitionBy("yyyyMMddHH").save(storageDir)
})
ssc.start()
ssc.awaitTermination()
}

val getStructType = {
val structFields = mutable.ArrayBuffer[StructField]()
structFields += DataTypes.createStructField("userId", DataTypes.StringType, true)
structFields += DataTypes.createStructField("eventTime", DataTypes.StringType, true)
structFields += DataTypes.createStructField("itemId", DataTypes.StringType, true)
structFields += DataTypes.createStructField("itemName", DataTypes.StringType, true)
structFields += DataTypes.createStructField("eventAttribute", DataTypes.StringType, true)
structFields += DataTypes.createStructField("eventDate", DataTypes.StringType, true)
val structType = DataTypes.createStructType(structFields.toArray)
structType
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值