该
uint64 MurmurHash64A(const void* key, int len, uint32 seed) {
const uint64 m = 0xc6a4a7935bd1e995;
const int r = 47;
uint64 h = seed ^ (len * m);
const uint64* data = (const uint64 *)key;
const uint64* end = data + (len/8);
while (data != end) {
uint64 k = *data++;
k *= m;
k ^= k >> r;
k *= m;
h ^= k;
h *= m;
}
const uint8* data2 = (const uint8*)data;
switch (len & 7) {
case 7: h ^= static_cast(data2[6]) << 48;
case 6: h ^= static_cast(data2[5]) << 40;
case 5: h ^= static_cast(data2[4]) << 32;
case 4: h ^= static_cast(data2[3]) << 24;
case 3: h ^= static_cast(data2[2]) << 16;
case 2: h ^= static_cast(data2[1]) << 8;
case 1: h ^= static_cast(data2[0]);
h *= m;
};
h ^= h >> r;
h *= m;
h ^= h >> r;
return h;
}
使用PHP的实现如下:
function Fingerprint($str) { $kFingerPrintSeed = 19820125;
return MurmurHash64A($str, $kFingerPrintSeed);
}
function getBytes($str) {
$len = strlen($str);
$bytes = array();
for($i=0;$i
$bytes[] = ord($str[$i]);
}
return $bytes;
}
function multi64($x, $y) {
$result = 0;
for($i = 0; $i < 64; $i++) {
$bit = ($x >> $i) & 1;
if($bit) {
$result = add64($result, $y << $i);
}
}
return $result;
}
function r_shift($num, $bit) {
if($bit <= 0) return $num;
if($num > 0) {
return $num>>$bit;
} else {
$num = $num>>1;
$num = $num & 0x7FFFFFFFFFFFFFFF;
return r_shift($num, $bit - 1);
}
}
function add64($x,$y){
$jw = $x & $y;
$jg = $x ^ $y;
while($jw)
{
$t_a = $jg;
$t_b = $jw << 1;
$jw = $t_a & $t_b;
$jg = $t_a ^ $t_b;
}
return $jg;
}
function MurmurHash64A($key, $seed) {
$m = -4132994306676758123;
$r = 47;
$len = strlen($key);
$h = $seed ^ (multi64($len, $m));
$bytes = getBytes($key);
for ($i = 0; $i <= ($len / 8) - 1; $i++) {
$k = 0;
for ($j = 0; $j < 8; $j++) {
$k = ($k << 8) | $bytes[$i * 8 + 7 - $j];
}
$k = multi64($k, $m);
$k ^= r_shift($k, $r);
$k = multi64($k, $m);
$h ^= $k;
$h = multi64($h, $m);
}
$data2_index = $len - $len % 8;
switch ($len & 7) {
case 7: $h ^= ($bytes[$data2_index + 6]) << 48;
case 6: $h ^= ($bytes[$data2_index + 5]) << 40;
case 5: $h ^= ($bytes[$data2_index + 4]) << 32;
case 4: $h ^= ($bytes[$data2_index + 3]) << 24;
case 3: $h ^= ($bytes[$data2_index + 2]) << 16;
case 2: $h ^= ($bytes[$data2_index + 1]) << 8;
case 1: $h ^= ($bytes[$data2_index + 0]);
$h = multi64($h, $m);
};
$h ^= r_shift($h, $r);
$h = multi64($h, $m);
$h ^= r_shift($h, $r);
return $h;
注意的问题:
代码见上面:
需要注意的问题是,对PHP来说,没有无符号数,所以会出现负数的问题。
对此可以处理如下
$fp = Fingerprint($model->url);
if ($fp > 0)
{
echo "fp is :".$fp."
";
echo "HEX fp is :".dechex($fp)."
";
} else
{
echo "HEX fp is :".dechex($fp)."
";
$new = bcadd('18446744073709551616' ,$fp);
echo "fp is :".$new.'
';
}
判断计算的fp是正还是负,如果是负数,则通过
bcadd('18446744073709551616' ,$fp) 可以将其转成字符串的表示来正确显示输出。
注意:
for ($i = 0; $i <= ($len / 8) - 1; $i++) { 这里应该为小于等于
之前为小于,在大部分场景下都是正确的。但是当长度为8时则错误。
经查原因为在php中,$line/8是一个浮点数,当长度不为8,比如为9时,$len/8w为1.125 ,其减一为0.125,还能保证执行一次循环。
所以是正确的。
而当长度为8时,又有没有了尾数部分,导致会少执行一次循环。