Redis dict

最新推荐文章于 2024-04-23 23:00:39 发布

lmm2003

最新推荐文章于 2024-04-23 23:00:39 发布

阅读量1.0k

点赞数

分类专栏： Redis 文章标签： redis iterator table 数据结构 dictionary function

Redis 专栏收录该内容

6 篇文章 0 订阅

订阅专栏

dict

HashKey最多有LONG_MAX个桶。

在redis中最基本的三个数据结构是dict 、adlist和sds，其中dict是redis中最重要的数据结构了，其key-value的映射关系就是通过dict来实现的，dict的内部实现是hash table，这个哈希表的大小是动态增加或减少的，主要是依据哈希表中的元素个数；同时哈希表适用链接法来解决哈希冲突的，具体实现在dict.h和dict.c文件中；

字典实现中主要用到如下5个结构体：

typedef struct dict {
    dictType *type;
    void *privdata; //每个dict的私有信息，用于不同的dict之间相互区分
    dictht ht[2]; // dict代表整个字典，内部有两个dictht, 以实现增量hash（将ht[0]中的值rehash到ht[1]中），

//使用是优先使用0号hash table，当空间不足时会调用dictExpand来扩展hash table，此时准备1号hash table用于增量的rehash使用。rehash完成后把0号释放，1号保存到0号。

int rehashidx; /* rehashing not in progress if rehashidx == -1 ,rehashidx是下一个需要rehash的项在ht[0] table中的索引，不需要rehash时置为-1。也就是说-1时，表示不进行rehash。Resize之后必然会引起Rehash
int iterators; /* number of iterators currently running */iterators记录当前dict中的迭代器数，主要是为了避免在有迭代器时rehash，在有迭代器时rehash可能会造成值的丢失或重复，有迭代器是不进行rehash,但可以Resize();
} dict;

typedef struct dictht {
    dictEntry **table; //是一个指针数组的地址，指针数组为dictEntry地址的集合
    unsigned long size; //是table的长度，通常为2的幂次方=slot或桶数。
    unsigned long sizemask; =size-1
    unsigned long used; //所有dictEntry的和，干嘛用？
} dictht;

 
    typedef struct  
    dictEntry  
    {. 
    
      
    void  
    * 
    key; 
    
      
    void  
    * 
    val; 
    
      
    struct  
    dictEntry  
    * 
    next; //使用链表法解决冲突 
    
 
    }  
    dictEntry; 
    
 
    typedef struct  
    dictType  
    {    //存放dic中数据的处理方法 :函数指针 
    
      
    unsigned int ( 
    * 
    hashFunction)( 
    const void  
    * 
    key); 
    
      
    void  
    *( 
    * 
    keyDup)( 
    void  
    * 
    privdata 
    ,  
    const void  
    * 
    key); 
    
      
    void  
    *( 
    * 
    valDup)( 
    void  
    * 
    privdata 
    ,  
    const void  
    * 
    obj); 
    
      
    int ( 
    * 
    keyCompare)( 
    void  
    * 
    privdata 
    ,  
    const void  
    * 
    key1 
    ,  
    const void  
    * 
    key2); 
    
      
    void ( 
    * 
    keyDestructor)( 
    void  
    * 
    privdata 
    ,  
    void  
    * 
    key); 
    
      
    void ( 
    * 
    valDestructor)( 
    void  
    * 
    privdata 
    ,  
    void  
    * 
    obj); 
    
 
    }  
    dictType; 
    
 
    
  
    typedef struct  
    dictIterator  
    { 
    
      
    dict  
    * 
    d; 
    
      
    int  
    table; 
    
      
    int  
    index; 
   

         int safe;//=1 支持多线程安全的iterator,有操作函数保证。//safe操作函数的dictNext会对dict->iterators++, 
   

         /* If safe is set to 1 this is a safe iteartor, that means, you can call 
    
      78  * dictAdd, dictFind, and other functions against the dictionary even while 
    
      79  * iterating. Otherwise it is a non safe iterator, and only dictNext() 
    
      80  * should be called while iterating. */因为safe iterator在使用过程中,只要不释放(iterators--),就不会进行实质的rehash,不会引起调用者预料

              不到的错误。 
    
    dictEntry  
    * 
    entry 
    ,  
    * 
    nextEntry; 
    
    }  
    dictIterator;

redis中用到的整数hash、字符串hash算法如下，做个备份：
 
      /* Thomas Wang's 32 bit Mix Function */ 
      
      unsigned int  
      dictIntHashFunction( 
      unsigned int  
      key) 
      
      { 
      
      key  
      +=  
      ~( 
      key  
      <<  
      15); 
      
      key  
      ^=  ( 
      key  
      >>  
      10); 
      
      key  
      +=  ( 
      key  
      <<  
      3); 
      
      key  
      ^=  ( 
      key  
      >>  
      6); 
      
      key  
      +=  
      ~( 
      key  
      <<  
      11); 
      
      key  
      ^=  ( 
      key  
      >>  
      16); 
      
      return  
      key; 
      
      } 
      
      /* Generic hash function (a popular one from Bernstein). 
      
      * I tested a few and this was the best. */ 基本上都用这个，为什么，:-) 
      
      unsigned int  
      dictGenHashFunction( 
      const unsigned char  
      * 
      buf 
      ,  
      int  
      len)  
      { 
      
      unsigned int  
      hash  
      =  
      5381; 
      
      while ( 
      len 
      --) 
      
      hash  
      = (( 
      hash  
      <<  
      5)  
      +  
      hash)  
      + ( 
      * 
      buf 
      ++);  
      /* hash * 33 + c */ 
      
      return  
      hash; 
      
      } 
     
      ReSize (用于创建或扩展HashTable) 
     
        Note that even when dict_can_resize is set to 0, not all resizes are
   prevented: an hash table is still allowed to grow if the ratio between
   the number of elements and the buckets > dict_force_resize_ratio. */
        static int dict_can_resize = 1; //
        static unsigned int dict_force_resize_ratio = 5; //此时不管是否允许，都要引起resize,否则前台体验下降。后端内存压力增大没有办法。
  
总 的说来，在系统运行有后台线程(linux一切都是进程），不允许自动自动调整大小，这是为了为了使得类linux系统的copy-on-write有更好的性能（没有调整大小， 就没有rehash，这样父进程的db没有改变，子进程就不需要真的copy数据）。在后台线程退出后，又会允许resize。
接下来我们看看自动调整大小的过程。
 什么时候dict做扩容
 在数据插入的时候会调用dictKeyIndex(只在插入数据时调用,存在的话返回-1,表示错误。不存在时，若rehashing,则返回ht[1]中table位置),该方法里会调用_dictExpandIfNeeded，判断dict是否需要rehash，当dict中元素大于桶的个数时，调用dictExpand扩展hash

         /* Expand the hash table if needed */ (因为在数据插入时被调用，我们根据hash已有的统计信息，判断是否要Expand. 
       
         static int _dictExpandIfNeeded(dict *d) 
       
         { 
       
         /* If the hash table is empty expand it to the intial size, 
       
         * if the table is “full” dobule its size. */ 
       
         if (dictIsRehashing(d)) return DICT_OK; 
       
         if (d->ht[0].size == 0) 
       
         return dictExpand(d, DICT_HT_INITIAL_SIZE); 
       
         if (d->ht[0].used >= d->ht[0].size && dict_can_resize) 
       
         return dictExpand(d, ((d->ht[0].size > d->ht[0].used) ? 
       
         d->ht[0].size : d->ht[0].used)*2); // 为什么需要判断?????? 
       
         return DICT_OK; 
       
         }

ReHash

通常情况下，所有的数据都是存在放dict的ht[0]中，ht[1]只在rehash的时候使用,rehash。但在rehash过程中两个都会ht都会用

dict进行rehash的时候，将ht[0]中的所有数据rehash到 ht[1]中。然后将ht[1]赋值给ht[0]，并清空ht[1]。

rehash有2种工作模式

lazy rehashing：在每次对dict进行操作的时候执行一个slot的rehash._dictRehashStep中，也会调用dictRehash，而_dictRehashStep每次仅会rehash一个值从ht[0]到 ht[1]，但由于_dictRehashStep是被dictGetRandomKey、dictFind、 dictGenericDelete、dictAdd调用的，因此在每次dict增删查改时都会被调用，这无疑就加快rehash了过程。N步，每一步移一个桶。

active rehashing：每100ms里面使用1ms时间进行rehash。serverCron中，当没有后台子线程时，会调用incrementallyRehash，最终调用dictRehashMilliseconds。incrementallyRehash的时间较长，rehash的个数也比较多。这里每次执行 1 millisecond rehash 操作；如果未完成 rehash，会在下一个 loop 里面继续执行。

tryResizeHashTables最终被serverCron调用，也就是在每次serverCron循环时都会被调用。

 
   static int  
   serverCron( 
   struct  
   aeEventLoop  
   * 
   eventLoop 
   ,  
   long long  
   id 
   ,  
   void  
   * 
   clientData)  
   { 
   
 
   --- 
   
 
   if ( 
   server 
   . 
   bgsavechildpid  
   ==  
   - 
   1  
   &&  
   server 
   . 
   bgrewritechildpid  
   ==  
   - 
   1)  
   { 
   
          
   if ( 
   !( 
   loops  
   %  
   10))  
   tryResizeHashTables(); 
   
          
   if ( 
   server 
   . 
   activerehashing)  
   incrementallyRehash(); 
   
      
   } 
   
 
   --- 
  

}

接下来看下rehash，主要在dictRehash中完成。先看下什么时候进行rehash。

在如上的serverCron中，当没有后台子线程时，会调用incrementallyRehash，最终调用dictRehash。incrementallyRehash的时间较长，rehash的个数也比较多。

另外在_dictRehashStep，也会调用dictRehash，而_dictRehashStep每次仅会rehash一个值从ht[0]到 ht[1](够缓慢的)，但由于_dictRehashStep是被dictGetRandomKey、dictFind、 dictGenericDelete、dictAdd调用的，因此在每次dict增删查改时都会被调用，这无疑就加快了rehash过程。

我们再来看看rehash过程。dictRehash每次增量rehash n个元素，由于在自动调整大小时已设置好了ht[1]的大小，因此rehash的主要过程就是遍历ht[0]，取得key，然后将该key按ht[1]的桶的大小重新rehash，并在rehash完后将ht[0]指向ht[1],然后将ht[1]清空。

lmm2003

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Redis dict

dictHashKey最多有LONG_MAX个桶。在redis中最基本的三个数据结构是dict 、adlist和sds，其中dict是redis中最重要的数据结构了，其key-value的映射关系就是通过dict来实现的，dict的内部实现是hash table，这个哈希表的大小是动态增加或减少的，主要是依据哈希表中的元素个数；同时哈希表适用链接法来解决哈希冲突的，具体实现在dic
复制链接

扫一扫

专栏目录