lighttpd-1.4.39 : array

最新推荐文章于 2016-05-15 16:25:41 发布

春泥面包

最新推荐文章于 2016-05-15 16:25:41 发布

阅读量526

点赞数

分类专栏： lighttpd 源码学习

本文链接：https://blog.csdn.net/huntinux/article/details/50631319

版权

lighttpd 源码学习专栏收录该内容

12 篇文章 0 订阅

订阅专栏

array的定义

typedef enum { TYPE_UNSET, TYPE_STRING, TYPE_COUNT, TYPE_ARRAY, TYPE_INTEGER, TYPE_FASTCGI, TYPE_CONFIG } data_type_t;

#define DATA_UNSET \
    data_type_t type; \
    buffer *key; \
    int is_index_key; /* 1 if key is a array index (autogenerated keys) */ \
    struct data_unset *(*copy)(const struct data_unset *src); \
    void (* free)(struct data_unset *p); \
    void (* reset)(struct data_unset *p); \
    int (*insert_dup)(struct data_unset *dst, struct data_unset *src); \
    void (*print)(const struct data_unset *p, int depth)

typedef struct data_unset {
    DATA_UNSET;
} data_unset;

typedef struct {
    data_unset  **data; // 存放数据

    size_t *sorted; // 存储排序元素的下标

    size_t used; // data中元素的个数
    size_t size; // data当前能存放元素的个数

    size_t unique_ndx;

    size_t next_power_of_2; // 用于查找过程，是不小于used的“最小的2的n次幂”，如：used为5，那么next_power_of_2 = 8。这是比较有意思的地方
    int is_weakref; /* data is weakref, don't bother the data */
} array;

模拟面向对象

将数据类型间公共的部分提取了出来，即宏DATA_UNSET。DATA_UNSET相当于基类，其他数据类型会包含DATA_UNSET，也就是“继承了”基类。

array的初始化

array *array_init(void) {
    array *a;

    a = calloc(1, sizeof(*a));
    force_assert(a);

    a->next_power_of_2 = 1;

    return a;
}

经过初始化，除了next_power_of_2为1以外，其他部分均为0

array插入元素过程(无重复元素)

需要清楚array中的data和sorted的含义。data是元素的集合（没有排序），而sorted存储的是对data进行排序后的“结果”，只不过sorted中存储的是元素在data中的下标。
所以插入过程的伪代码大致是这样的：

/*
第一步
查找array中有没有要插入的元素str，
如果存在，那么ndx表示该元素在data中的下标,pos表示该元素在sorted中的位置
如果不存在，那么ndx=-1，pos表示最后比较失败的位置（已经在插入位置的附近了）
*/
ndx = array_get_index(array, str, pos);
if (ndx != -1) {
    return 0;
}

/*
第二步
将str放在data的最后面
将str在data中的下标放在sorted中合适的位置
    将str与data[sorted[pos]]进行比较
        如果str较小，那么插入位置就是pos
        否则，插入位置是pos+1
            sorted中pos以后的元素向后移动一位,
            将str在data中的位置，放在soted[pos]中。
*/
array->data[array->used] = str;
cmp = compare(key, array->data[array->sorted[pos]]);
if (cmp > 0) pos += 1
sorted[pos, used] --> sorted[pos+1, used+1]
sorted[pos] = used;
array->used++;
...

完整源码：

int array_insert_unique(array *a, data_unset *str) {
    int ndx = -1;
    int pos = 0;
    size_t j;

    /* generate unique index if neccesary */
    if (buffer_is_empty(str->key) || str->is_index_key) {
        buffer_copy_int(str->key, a->unique_ndx++);
        str->is_index_key = 1;
    }

    /* try to find the string */
    if (-1 != (ndx = array_get_index(a, CONST_BUF_LEN(str->key), &pos))) {
        /* found, leave here */
        if (a->data[ndx]->type == str->type) {
            str->insert_dup(a->data[ndx], str);
        } else {
            SEGFAULT();
        }
        return 0;
    }

    /* insert */

    if (a->used+1 > INT_MAX) {
        /* we can't handle more then INT_MAX entries: see array_get_index() */
        return -1;
    }

    if (a->size == 0) {
        a->size   = 16;
        a->data   = malloc(sizeof(*a->data)     * a->size);
        a->sorted = malloc(sizeof(*a->sorted)   * a->size);
        force_assert(a->data);
        force_assert(a->sorted);
        for (j = a->used; j < a->size; j++) a->data[j] = NULL;
    } else if (a->size == a->used) {
        a->size  += 16;
        a->data   = realloc(a->data,   sizeof(*a->data)   * a->size);
        a->sorted = realloc(a->sorted, sizeof(*a->sorted) * a->size);
        force_assert(a->data);
        force_assert(a->sorted);
        for (j = a->used; j < a->size; j++) a->data[j] = NULL;
    }

    ndx = (int) a->used;

    /* make sure there is nothing here */
    if (a->data[ndx]) a->data[ndx]->free(a->data[ndx]);

    a->data[a->used++] = str;

    if (pos != ndx &&
        ((pos < 0) ||
         buffer_caseless_compare(CONST_BUF_LEN(str->key), CONST_BUF_LEN(a->data[a->sorted[pos]]->key)) > 0)) {
        pos++;
    }

    /* move everything on step to the right */
    if (pos != ndx) {
        memmove(a->sorted + (pos + 1), a->sorted + (pos), (ndx - pos) * sizeof(*a->sorted));
    }

    /* insert */
    a->sorted[pos] = ndx;

    if (a->next_power_of_2 == (size_t)ndx) a->next_power_of_2 <<= 1;

    return 0;
}

其中的函数array_get_index与传统的二分查找不太一样。
思考这样的问题：如果当前有3个元素，那么最多需要几次比较呢？
答案是2次。
next_power_of_2是不小于used的“最小的2的n次幂”。例如used=3时，next_power_of_2=4。
可以以此为例，自己在脑海中运行下面的程序。

static int array_get_index(array *a, const char *key, size_t keylen, int *rndx) {
    int ndx = -1;
    int i, pos = 0;

    if (key == NULL) return -1;

    /* try to find the string */
    for (i = pos = a->next_power_of_2 / 2; ; i >>= 1) {
        int cmp;

        if (pos < 0) {
            pos += i;
        } else if (pos >= (int)a->used) {
            pos -= i;
        } else {
            cmp = buffer_caseless_compare(key, keylen, CONST_BUF_LEN(a->data[a->sorted[pos]]->key));

            if (cmp == 0) {
                /* found */
                ndx = a->sorted[pos];
                break;
            } else if (cmp < 0) {
                pos -= i;
            } else {
                pos += i;
            }
        }
        if (i == 0) break;
    }

    if (rndx) *rndx = pos;

    return ndx;
}

春泥面包

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
lighttpd-1.4.39 : array

array的定义typedef enum { TYPE_UNSET, TYPE_STRING, TYPE_COUNT, TYPE_ARRAY, TYPE_INTEGER, TYPE_FASTCGI, TYPE_CONFIG } data_type_t;#define DATA_UNSET \ data_type_t type; \ buffer *key; \ int is_
复制链接

扫一扫

专栏目录