simhash实现

import com.clearspring.analytics.hash.MurmurHash

/**
  * Created by fhqplzj on 17-3-1 at 下午6:07.
  */
object Sim {
  def simHash(features: Array[String], weights: Array[Int]): Long = {
    val hist = Array.ofDim[Int](64)
    features.zip(weights).foreach {
      case (feature, weight) => {
        val hash = MurmurHash.hash64(feature)
        for (i <- 0 until 64) {
          if ((hash & (1 << i)) == 0) {
            hist(i) += -weight
          } else {
            hist(i) += weight
          }
        }
      }
    }
    var result: Long = 0
    for (i <- 0 until 64) {
      if (hist(i) >= 0) {
        result |= (1 << i)
      }
    }
    result
  }

  def main(args: Array[String]): Unit = {
    val features = "zhao jun haha".split(" ")
    val weights = Array.fill(features.length)(1)
    println(simHash(features, weights))
  }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值