memcache 1.4.24源码分析

最新推荐文章于 2022-04-22 19:37:52 发布

wuhuaiyu

最新推荐文章于 2022-04-22 19:37:52 发布

阅读量563

点赞数

分类专栏：数据库架构

本文链接：https://blog.csdn.net/wuhuaiyu/article/details/50985701

版权

架构同时被 2 个专栏收录

22 篇文章 0 订阅

订阅专栏

数据库

3 篇文章 0 订阅

订阅专栏

内存管理
最底层为slab。

/* powers-of-N allocation structures */

typedef struct {
    unsigned int size;      /* sizes of items  此级slab的chunk大小时多少 */
    unsigned int perslab;   /* how many items（chunk） per slab */

    void *slots;           /* list of item ptrs */
    unsigned int sl_curr;   /* total free items in list */

    unsigned int slabs;     /* how many slabs（每个slabs是1M） were allocated for this class */

    void **slab_list;       /* array of slab pointers */
    unsigned int list_size; /* size of prev array */

    unsigned int killing;  /* index+1 of dying slab, or zero if none */
    size_t requested; /* The number of requested bytes */
} slabclass_t;

slab分很多等级，每个slab按块分配，slab内部有很多chunk。第二级slab的内部chunk大小是第一级的factor倍（1.25），第三级是第二级的1.25倍，依次递增。每个等级slab可以有多个，最多64级。
slab rebalance在各个slab级别之间根据闲忙进行slab调整。

/**
 * Structure for storing items within memcached.
 */
typedef struct _stritem {
    /* Protected by LRU locks */
    struct _stritem *next;
    struct _stritem *prev;
    /* Rest are protected by an item lock */
    struct _stritem *h_next;    /* hash chain next */
    rel_time_t      time;       /* least recent access */
    rel_time_t      exptime;    /* expire time */
    int             nbytes;     /* size of data */
    unsigned short  refcount;
    uint8_t         nsuffix;    /* length of flags-and-length string */
    uint8_t         it_flags;   /* ITEM_* above */
    uint8_t         slabs_clsid;/* which slab class we're in (根据slabid区分在哪个队列0－63 hot 。。。）*/
    uint8_t         nkey;       /* key length, w/terminating null and padding */
    /* this odd type prevents type-punning issues when we do
     * the little shuffle to save space when not using CAS. */
    union {
        uint64_t cas;
        char end;
    } data[];
    /* if it_flags & ITEM_CAS we have 8 bytes CAS */
    /* then null-terminated key */
    /* then " flags length\r\n" (no terminating null) */
    /* then data with terminating \r\n (no terminating null; it's binary!) */
} item;

每个item分配时根据大小，找到一个刚好大于等于item长度的最小的chunk（一个chunk只能存储一个item，如果不正好就浪费掉，为了方便内存回收），同时从对应的slab的freelist中摘除，分别根据slab大小划分的，hot warm cold noexp（分配时才放入）四个队列。

typedef struct {
    struct _stritem *next;
    struct _stritem *prev;
    struct _stritem *h_next;    /* hash chain next */
    rel_time_t      time;       /* least recent access */
    rel_time_t      exptime;    /* expire time */
    int             nbytes;     /* size of data */
    unsigned short  refcount;
    uint8_t         nsuffix;    /* length of flags-and-length string */
    uint8_t         it_flags;   /* ITEM_* above */
    uint8_t         slabs_clsid;/* which slab class we're in */
    uint8_t         nkey;       /* key length, w/terminating null and padding */
    uint32_t        remaining;  /* Max keys to crawl per slab per invocation */
} crawler;

crawler进行全局遍历，根据一个插入的item由list尾部向前，来进行过期数据删除。

淘汰策略有注意，如果一个数据没过期，一个过期了，没过期的经常访问，也会先淘汰没过期不经常访问的数据。

为进行快速检索，根据key 进行hash，放入hash表，用链（h_next指针）进行冲突处理。
assoc_maintenance
当hash表容量到 hash_items > (hashsize(hashpower) * 3) / 2 成都，进行hash翻倍膨胀。

线程模型

/* An item in the connection queue. */
typedef struct conn_queue_item CQ_ITEM;
struct conn_queue_item {
    int               sfd;
    enum conn_states  init_state;
    int               event_flags;
    int               read_buffer_size;
    enum network_transport     transport;
    CQ_ITEM          *next;
};

/* A connection queue. */
typedef struct conn_queue CQ;
struct conn_queue {
    CQ_ITEM *head;
    CQ_ITEM *tail;
    pthread_mutex_t lock;
};
typedef struct {
    pthread_t thread_id;        /* unique ID of this thread */
    struct event_base *base;    /* libevent handle this thread uses */
    struct event notify_event;  /* listen event for notify pipe */
    int notify_receive_fd;      /* receiving end of notify pipe */
    int notify_send_fd;         /* sending end of notify pipe */
    struct thread_stats stats;  /* Stats generated by this thread */
    struct conn_queue *new_conn_queue; /* queue of new connections to handle */
    cache_t *suffix_cache;      /* suffix cache( 此处cache仅仅是线程处理的专属小片内存，和真正的memcache中的cache一词毫无关系 */
} LIBEVENT_THREAD;

每个work线程一个独立libevent实例（因为线程不安全），同时还有一个与主进程通信管道，及一个连接队列。
主线程中，server_socket 调用 dispatch_conn_new 函数接收请求，将通过轮询向work线程通过pipe写入‘c‘，将连接放入线程专属队列。thread_libevent_process函数，激活线程，从队列中读取connect对象，并建立线程专属connection，然后进入此connection的event_handler循环.