Ceph 学习——CRUSH算法及源码分析（二）

最新推荐文章于 2023-03-14 15:27:49 发布

SEU_PAN

最新推荐文章于 2023-03-14 15:27:49 发布

阅读量1.4k

点赞数 3

分类专栏： Ceph源码分析分布式存储文章标签： Ceph源码 Ceph学习 CRUSH源码解析分布式存储 CRUSH算法

本文链接：https://blog.csdn.net/csnd_pan/article/details/78675198

版权

前一章介绍CRUSH算法的基本原理和一些基本的数据结构，这一节将介CRUSH的源码实现，主要是一些对算法实现的具体函数的介绍。
回顾 Ceph 学习——CRUSH算法及源码分析（一）
CRUSH 相关源代码位于源码文件的 ceph/src/crush中这个编辑器不能上传附件的吗？知道的朋友教下新手小白怎么上传附件

相关数据结构
CRUSH算法实现

相关数据结构

CRUSH 算法相关的数据结构主要有几个分别是 crush_map、crush_bucket、crush_rule。数据结构的定义主要是在文件 Crush.h、Crush.c中，它们的内容是定义了crush_map 相关的数据结构，还有相关的destory方法（如crush_destroy()、crush_destroy_rule()、crush_destroy_bucket()等方法）。这部分代码实现不多，主要看看相关数据结构的定义即可。打开Crush.h。

crush_map

首先看看crush_map的结构体定义

struct crush_map {
        /*! An array of crush_bucket pointers of size __max_buckets__.
         * An element of the array may be NULL if the bucket was removed with
         * crush_remove_bucket(). The buckets must be added with crush_add_bucket().
         * The bucket found at __buckets[i]__ must have a crush_bucket.id == -1-i.
         */
    struct crush_bucket **buckets;//动态二维数组，保存所有的bucket结构
        /*! An array of crush_rule pointers of size __max_rules__.
         * An element of the array may be NULL if the rule was removed (there is
         * no API to do so but there may be one in the future). The rules must be added
         * with crush_add_rule().
         */
    struct crush_rule **rules;//保存所有的Crush_rule 

    __s32 max_buckets; /*!< the size of __buckets__ */
    __u32 max_rules; /*!< the size of __rules__ */
        /*! The value of the highest item stored in the crush_map + 1
         */
    __s32 max_devices;
    ...
    ...
    ...

};

crush_bucket

首先看看crush_bucket的结构体定义

struct crush_bucket {
    __s32 id;        /*!< bucket identifier, < 0 and unique within a crush_map ### crush_bucket的ID，唯一且为负数 */
    __u16 type;      /*!< > 0 bucket type, defined by the caller ### 类型 ，0表示为OSD设备 */
    __u8 alg;        /*!< the item selection ::crush_algorithm ### bucket的选择算法*/
        /*! @cond INTERNAL */
    __u8 hash;       /* which hash function to use, CRUSH_HASH_* ### bucket的hash函数*/
    /*! @endcond */
    __u32 weight;    /*!< 16.16 fixed point cumulated children weight ### 权重*/
    __u32 size;      /*!< size of the __items__ array ### 该bucket下的item数量*/
    __s32 *items;    /*!< array of children: < 0 are buckets, >= 0 items ### 子item （bucket or osd）在buckets（存crush_bucket的数组）中的下标*/
};

crush_rule

enum crush_opcodes {
        /*! do nothing  定义操作码
         */
    CRUSH_RULE_NOOP = 0,
    CRUSH_RULE_TAKE = 1,          /* arg1 = value to start with */
    CRUSH_RULE_CHOOSE_FIRSTN = 2, /* arg1 = num items to pick */
                      /* arg2 = type */
    CRUSH_RULE_CHOOSE_INDEP = 3,  /* same */
    CRUSH_RULE_EMIT = 4,          /* no args */
    CRUSH_RULE_CHOOSELEAF_FIRSTN = 6,
    CRUSH_RULE_CHOOSELEAF_INDEP = 7,

    CRUSH_RULE_SET_CHOOSE_TRIES = 8, /* override choose_total_tries */
    CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */
    CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10,
    CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11,
    CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12,
    CRUSH_RULE_SET_CHOOSELEAF_STABLE = 13
};

 /*
 * CRUSH uses user-defined "rules" to describe how inputs should be
 * mapped to devices.  A rule consists of sequence of steps to perform
 * to generate the set of output devices.
 */
struct crush_rule_step {
    __u32 op;//对应操作步的操作码
    __s32 arg1;//若step为take 则为要选择的bucket的ID，step是select则为选择数量
    __s32 arg2;//step是select则为选择bucket的类型
};

/*
 * The rule mask is used to describe what the rule is intended for.
 * Given a ruleset and size of output set, we search through the
 * rule list for a matching rule_mask.
 */
struct crush_rule_mask {
    __u8 ruleset;//ruleset的编号
    __u8 type;//类型
    __u8 min_size;
    __u8 max_size;
};

struct crush_rule {
    __u32 len;//steps数组的长度
    struct crush_rule_mask mask;//ruleset的相关配置参数
    struct crush_rule_step steps[0];//操作步
};

相关数据结构的构造（生成实例）

这一部分主要在源代码的builder.c 和 builder.h中，在这主要是将上面的额数据结构生成对应的实例（填充数据）。
这一块不详细介绍，同样看看.h文件了解有哪些函数即可。

/** @ingroup API
 *
 * Allocate a crush_map with __malloc(3)__ and initialize it. The
 * caller is responsible for deallocating the crush_map with
 * crush_destroy().
 *
 * The content of the allocated crush_map is set with
 * set_optimal_crush_map(). The caller is responsible for setting each
 * tunable in the __crush_map__ for backward compatibility or mapping
 * stability.
 *
 * @returns a pointer to the newly created crush_map or NULL
 */
extern struct crush_map *crush_create();


/* rules */
/** @ingroup API
 *
 * If __malloc(3)__ fails, return NULL.
 *
 * @param len number of steps in the rule
 * @param ruleset user defined value
 * @param type user defined value
 * @param minsize minimum number of items the rule can map
 * @param maxsize maximum number of items the rule can map
 *
 * @returns a pointer to the newly created rule or NULL
 */
extern struct crush_rule *crush_make_rule(int len, int ruleset, int type, int minsize, int maxsize);
/** @ingroup API

 * @param rule the rule in which the step is inserted
 * @param pos the zero based step index
 * @param op one of __CRUSH_RULE_NOOP__, __CRUSH_RULE_TAKE__, __CRUSH_RULE_CHOOSE_FIRSTN__, __CRUSH_RULE_CHOOSE_INDEP__, __CRUSH_RULE_CHOOSELEAF_FIRSTN__, __CRUSH_RULE_CHOOSELEAF_INDEP__, __CRUSH_RULE_SET_CHOOSE_TRIES__, __CRUSH_RULE_SET_CHOOSELEAF_TRIES__ or __CRUSH_RULE_EMIT__
 * @param arg1 first argument for __op__
 * @param arg2 second argument for __op__
 */
extern void crush_rule_set_step(struct crush_rule *rule, int pos, int op, int arg1, int arg2);
/** &