为啥要用位运算代替取模呢

最新推荐文章于 2024-06-24 15:48:12 发布

varyall

最新推荐文章于 2024-06-24 15:48:12 发布

阅读量6.4k

点赞数 2

分类专栏： map

map 专栏收录该内容

15 篇文章 2 订阅

订阅专栏

在hash中查找key的时候，经常会发现用&取代%，先看两段代码吧，

JDK6中的HashMap中的indexFor方法：

    Java代码   
    
  
 /** 
  * Returns index for hash code h. 
  */  
 static int indexFor(int h, int length) {  
     return h & (length-1);  
 }  

Redis2.4中的代码段：

    C代码   
    
  
 n.size = realsize;  
 n.sizemask = realsize-1;  
 //此处略去xxx行  
 hile(de) {  
         unsigned int h;  
   
         nextde = de->next;  
         /* Get the index in the new hash table */  
         h = dictHashKey(d, de->key) & d->ht[1].sizemask;  
         de->next = d->ht[1].table[h];  
         d->ht[1].table[h] = de;  
         d->ht[0].used--;  
         d->ht[1].used++;  
         de = nextde;  
     }  

大家可以看到a%b取模的形式都被替换成了a&(b-1) ，当hashtable的长度是2的幂的情况下（疏忽，一开始没写），这两者是等价的，那为什么要用后者呢？

另一方面，为什么hashtable的长度最好要是2的n次方呢，这个不在本次讨论范围之列，原因简单说一下就是1、分布更均匀 2、碰撞几率更小详情自己思考，JDK中的HashMap就会在初始化时，保证这一点：

    Java代码   
    
  
 public HashMap(int initialCapacity, float loadFactor) {  
     if (initialCapacity < 0)  
         throw new IllegalArgumentException("Illegal initial capacity: " +  
                                            initialCapacity);  
     if (initialCapacity > MAXIMUM_CAPACITY)  
         initialCapacity = MAXIMUM_CAPACITY;  
     if (loadFactor <= 0 || Float.isNaN(loadFactor))  
         throw new IllegalArgumentException("Illegal load factor: " +  
                                            loadFactor);  
   
     // Find a power of 2 >= initialCapacity  
     int capacity = 1;  
     while (capacity < initialCapacity)  
         capacity <<= 1;  
   
     this.loadFactor = loadFactor;  
     threshold = (int)(capacity * loadFactor);  
     table = new Entry[capacity];  
     init();  
 }  

redis中也有类似的保证：

    Java代码   
    
  
 /* Our hash table capability is a power of two */  
 static unsigned long _dictNextPower(unsigned long size)  
 {  
     unsigned long i = DICT_HT_INITIAL_SIZE;  
   
     if (size >= LONG_MAX) return LONG_MAX;  
     while(1) {  
         if (i >= size)  
             return i;  
         i *= 2;  
     }  
 }  

言归正传，大家都知道位运算的效率最高，这也是&取代%的原因，来看个程序：

    C代码   
    
  
 int main(int argc, char* argv[])  
 {  
     int a = 0x111;  
     int b = 0x222;  
     int c = 0;  
     int d = 0;  
   
     c = a & (b-1);  
     d = a % b;  
   
     return 0;  
 }  

看反汇编的结果：

    反汇编代码   
    
  
 13:       c = a & (b-1);  
 00401044   mov         eax,dword ptr [ebp-8]  
 00401047   sub         eax,1  
 0040104A   mov         ecx,dword ptr [ebp-4]  
 0040104D   and         ecx,eax  
 0040104F   mov         dword ptr [ebp-0Ch],ecx  
 14:       d = a % b;  
 00401052   mov         eax,dword ptr [ebp-4]  
 00401055   cdq  
 00401056   idiv        eax,dword ptr [ebp-8]  
 00401059   mov         dword ptr [ebp-10h],edx  

可以看到，&操作用了:3mov+1and+1sub %操作用了：2mov+1cdp+1idiv

我们可以查阅Coding_ASM_-_Intel_Instruction_Set_Codes_and_Cycles资料，发现前者只需5个CPU周期，而后者至少需要26个CPU周期（注意，是最少！！！）效率显而易见。所以以后自己在写的时候，也可以使用前者的写法。

在hash中查找key的时候，经常会发现用&取代%，先看两段代码吧，

JDK6中的HashMap中的indexFor方法：

    Java代码   
    
  
 /** 
  * Returns index for hash code h. 
  */  
 static int indexFor(int h, int length) {  
     return h & (length-1);  
 }  

Redis2.4中的代码段：

    C代码   
    
  
 n.size = realsize;  
 n.sizemask = realsize-1;  
 //此处略去xxx行  
 hile(de) {  
         unsigned int h;  
   
         nextde = de->next;  
         /* Get the index in the new hash table */  
         h = dictHashKey(d, de->key) & d->ht[1].sizemask;  
         de->next = d->ht[1].table[h];  
         d->ht[1].table[h] = de;  
         d->ht[0].used--;  
         d->ht[1].used++;  
         de = nextde;  
     }  

大家可以看到a%b取模的形式都被替换成了a&(b-1) ，当hashtable的长度是2的幂的情况下（疏忽，一开始没写），这两者是等价的，那为什么要用后者呢？

    Java代码   
    
  
 public HashMap(int initialCapacity, float loadFactor) {  
     if (initialCapacity < 0)  
         throw new IllegalArgumentException("Illegal initial capacity: " +  
                                            initialCapacity);  
     if (initialCapacity > MAXIMUM_CAPACITY)  
         initialCapacity = MAXIMUM_CAPACITY;  
     if (loadFactor <= 0 || Float.isNaN(loadFactor))  
         throw new IllegalArgumentException("Illegal load factor: " +  
                                            loadFactor);  
   
     // Find a power of 2 >= initialCapacity  
     int capacity = 1;  
     while (capacity < initialCapacity)  
         capacity <<= 1;  
   
     this.loadFactor = loadFactor;  
     threshold = (int)(capacity * loadFactor);  
     table = new Entry[capacity];  
     init();  
 }  

redis中也有类似的保证：

    Java代码   
    
  
 /* Our hash table capability is a power of two */  
 static unsigned long _dictNextPower(unsigned long size)  
 {  
     unsigned long i = DICT_HT_INITIAL_SIZE;  
   
     if (size >= LONG_MAX) return LONG_MAX;  
     while(1) {  
         if (i >= size)  
             return i;  
         i *= 2;  
     }  
 }  

言归正传，大家都知道位运算的效率最高，这也是&取代%的原因，来看个程序：

    C代码   
    
  
 int main(int argc, char* argv[])  
 {  
     int a = 0x111;  
     int b = 0x222;  
     int c = 0;  
     int d = 0;  
   
     c = a & (b-1);  
     d = a % b;  
   
     return 0;  
 }  

看反汇编的结果：

    反汇编代码   
    
  
 13:       c = a & (b-1);  
 00401044   mov         eax,dword ptr [ebp-8]  
 00401047   sub         eax,1  
 0040104A   mov         ecx,dword ptr [ebp-4]  
 0040104D   and         ecx,eax  
 0040104F   mov         dword ptr [ebp-0Ch],ecx  
 14:       d = a % b;  
 00401052   mov         eax,dword ptr [ebp-4]  
 00401055   cdq  
 00401056   idiv        eax,dword ptr [ebp-8]  
 00401059   mov         dword ptr [ebp-10h],edx  

可以看到，&操作用了:3mov+1and+1sub %操作用了：2mov+1cdp+1idiv

http://crazyjvm.iteye.com/blog/1725508

在hash中查找key的时候，经常会发现用&取代%，先看两段代码吧，

JDK6中的HashMap中的indexFor方法：

     Java代码   
     
   
 /** 
  * Returns index for hash code h. 
  */  
 static int indexFor(int h, int length) {  
     return h & (length-1);  
 }  

Redis2.4中的代码段：

     C代码   
     
   
 n.size = realsize;  
 n.sizemask = realsize-1;  
 //此处略去xxx行  
 hile(de) {  
         unsigned int h;  
   
         nextde = de->next;  
         /* Get the index in the new hash table */  
         h = dictHashKey(d, de->key) & d->ht[1].sizemask;  
         de->next = d->ht[1].table[h];  
         d->ht[1].table[h] = de;  
         d->ht[0].used--;  
         d->ht[1].used++;  
         de = nextde;  
     }  

大家可以看到a%b取模的形式都被替换成了a&(b-1) ，当hashtable的长度是2的幂的情况下（疏忽，一开始没写），这两者是等价的，那为什么要用后者呢？

     Java代码   
     
   
 public HashMap(int initialCapacity, float loadFactor) {  
     if (initialCapacity < 0)  
         throw new IllegalArgumentException("Illegal initial capacity: " +  
                                            initialCapacity);  
     if (initialCapacity > MAXIMUM_CAPACITY)  
         initialCapacity = MAXIMUM_CAPACITY;  
     if (loadFactor <= 0 || Float.isNaN(loadFactor))  
         throw new IllegalArgumentException("Illegal load factor: " +  
                                            loadFactor);  
   
     // Find a power of 2 >= initialCapacity  
     int capacity = 1;  
     while (capacity < initialCapacity)  
         capacity <<= 1;  
   
     this.loadFactor = loadFactor;  
     threshold = (int)(capacity * loadFactor);  
     table = new Entry[capacity];  
     init();  
 }  

redis中也有类似的保证：

     Java代码   
     
   
 /* Our hash table capability is a power of two */  
 static unsigned long _dictNextPower(unsigned long size)  
 {  
     unsigned long i = DICT_HT_INITIAL_SIZE;  
   
     if (size >= LONG_MAX) return LONG_MAX;  
     while(1) {  
         if (i >= size)  
             return i;  
         i *= 2;  
     }  
 }  

言归正传，大家都知道位运算的效率最高，这也是&取代%的原因，来看个程序：

     C代码   
     
   
 int main(int argc, char* argv[])  
 {  
     int a = 0x111;  
     int b = 0x222;  
     int c = 0;  
     int d = 0;  
   
     c = a & (b-1);  
     d = a % b;  
   
     return 0;  
 }  

看反汇编的结果：

     反汇编代码   
     
   
 13:       c = a & (b-1);  
 00401044   mov         eax,dword ptr [ebp-8]  
 00401047   sub         eax,1  
 0040104A   mov         ecx,dword ptr [ebp-4]  
 0040104D   and         ecx,eax  
 0040104F   mov         dword ptr [ebp-0Ch],ecx  
 14:       d = a % b;  
 00401052   mov         eax,dword ptr [ebp-4]  
 00401055   cdq  
 00401056   idiv        eax,dword ptr [ebp-8]  
 00401059   mov         dword ptr [ebp-10h],edx  

可以看到，&操作用了:3mov+1and+1sub %操作用了：2mov+1cdp+1idiv

varyall

关注

2
点赞
踩
11

收藏

觉得还不错? 一键收藏
0
评论
为啥要用位运算代替取模呢

在hash中查找key的时候，经常会发现用&取代%，先看两段代码吧， JDK6中的HashMap中的indexFor方法：Java代码 /** * Returns index for hash code h. */ static int indexFor(int h, int length) { return h &
复制链接

扫一扫

专栏目录