import com.clearspring.analytics.hash.MurmurHash
/**
* Created by fhqplzj on 17-3-1 at 下午6:07.
*/
object Sim {
def simHash(features: Array[String], weights: Array[Int]): Long = {
val hist = Array.ofDim[Int](64)
features.zip(weights).foreach {
case (feature, weight) => {
val hash = MurmurHash.hash64(feature)
for (i <- 0 until 64) {
if ((hash & (1 << i)) == 0) {
hist(i) += -weight
} else {
hist(i) += weight
}
}
}
}
var result: Long = 0
for (i <- 0 until 64) {
if (hist(i) >= 0) {
result |= (1 << i)
}
}
result
}
def main(args: Array[String]): Unit = {
val features = "zhao jun haha".split(" ")
val weights = Array.fill(features.length)(1)
println(simHash(features, weights))
}
}
simhash实现
最新推荐文章于 2024-08-23 09:34:55 发布