//knn
def calculateUnionSimilarity(itemSim: RDD[VideoSimStandPer], contentSim: RDD[VideoSimStandPer], ContentSimWeight: Double, calculateNumber: Int): RDD[VideoSim] = {
xxx
.reduceByKey(_ + _).map(f => (f._1._1, (f._1._2, f._2))).groupByKey().map(f => {
val i2 = f._2.toBuffer
val i2_2 = i2.sortBy(_._2)
if (i2_2.length > calculateNumber) i2_2.remove(0, i2_2.length - calculateNumber)
(f._1, i2_2)
}).flatMap(f => {
for (w <- f._2) yield (f._1, w._1, w._2)
}).filter(f => f._3 > 0).map(f => VideoSim(f._1, f._2, f._3))
}
def calculateSimilarity(video: RDD[(String, Set[String])], myFunc: (Set[String], Set[String]) => Double): RDD[(String, String, Double)] = {
video.cartesian(video).filter(f => f._1._1 != f._2._1).map(f => (f._1._1, f._2._1, myFunc(f._1._2, f._2._2)))
.filter(f => f._3 > 0).groupBy(_._1).flatMap(f => f._2.toList.sortWith((a, b) => a._3 > b._3).take(100))
}
飞哥更改
object xxx {
//时间函数的一种创建方式
- private val today :String = TimeUtil.currentDay().toString
+ private val today: Int = TimeUtil.currentDay()
def main(args: Array[String]) {
for (dir <- collectDirs if dir.isDirectory) {
//xxx/yyyymmdd
val statDirs = fs.listStatus(dir.getPath)
- for (statDir <- statDirs if statDir.isDirectory && !today.equals(statDir.getPath.getName)) {
+ for (statDir <- statDirs if statDir.isDirectory && isInCompressDuration(statDir.getPath.getName)) {
val dirDays = fs.listStatus(statDir.getPath)
//isFile和getLen的方法的使用
//getPath.toString和getPath.getName方法的使用
for (file <- dirDays if file.isFile && file.getLen > 0) {
val name = file.getPath.toString
}
}
+ private def isInCompressDuration(date: String): Boolean = {
+ try {
+ val day = date.toInt
+ return day < today && day + 30 > today
+ } catch {
+ case e: Exception => e.printStackTrace()
+ }
+ false
+ }
+
knn、listStatus、时间函数
最新推荐文章于 2021-10-13 21:38:13 发布