内核sort.c 分析_kernel lib sort.c-CSDN博客

本文链接：https://blog.csdn.net/zmrlinux/article/details/60869371

最近，有人面试被问到sort了，总结文档一小篇。

内核数据结构与算法 --sort.c
sort.c 的定义位置： 内核/inclde/linux/sort.h
sort 源码的位置： 内核/lib/sort.c
我们来看看sort.h 和 sort.c 的内容,sort 是以库的是形式存在，实现上使用内核倒入符号：
sort.h 只进行了定义。

#ifndef _LINUX_SORT_H #define _LINUX_SORT_H #include <linux/types.h> void sort(void *base, size_t num, size_t size, int (*cmp)(const void *, const void *), void (*swap)(void *, void *, int)); #endif sort.c * A fast, small, non-recursive O(nlog n) sort for the Linux kernel * * Jan 23 2005 Matt Mackall <mpm@selenic.com> */ #include <linux/types.h> #include <linux/export.h> #include <linux/sort.h> static int alignment_ok(const void *base, int align) { return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || ((unsigned long)base & (align - 1)) == 0; } static void u32_swap(void *a, void *b, int size) { u32 t = *(u32 *)a; *(u32 *)a = *(u32 *)b; *(u32 *)b = t; } static void u64_swap(void *a, void *b, int size) { u64 t = *(u64 *)a; *(u64 *)a = *(u64 *)b; *(u64 *)b = t; } static void generic_swap(void *a, void *b, int size) { char t; do { t = *(char *)a; *(char *)a++ = *(char *)b; *(char *)b++ = t; } while (--size > 0); } /** * sort - sort an array of elements * @base: pointer to data to sort * @num: number of elements * @size: size of each element * @cmp_func: pointer to comparison function * @swap_func: pointer to swap function or NULL * * This function does a heapsort on the given array. You may provide a * swap_func function optimized to your element type. * * Sorting time is O(n log n) both on average and worst-case. While * qsort is about 20% faster on average, it suffers from exploitable * O(n*n) worst-case behavior and extra memory requirements that make * it less suitable for kernel use. */ void sort(void *base, size_t num, size_t size, int (*cmp_func)(const void *, const void *), void (*swap_func)(void *, void *, int size)) { /* pre-scale counters for performance */ int i = (num/2 - 1) * size, n = num * size, c, r; if (!swap_func) { if (size == 4 && alignment_ok(base, 4)) swap_func = u32_swap; else if (size == 8 && alignment_ok(base, 8)) swap_func = u64_swap; else swap_func = generic_swap; } /* heapify */ for ( ; i >= 0; i -= size) { for (r = i; r * 2 + size < n; r = c) { c = r * 2 + size; if (c < n - size && cmp_func(base + c, base + c + size) < 0) c += size; if (cmp_func(base + r, base + c) >= 0) break; swap_func(base + r, base + c, size); } } /* sort */ for (i = n - size; i > 0; i -= size) { swap_func(base, base + i, size); for (r = 0; r * 2 + size < i; r = c) { c = r * 2 + size; if (c < i - size && cmp_func(base + c, base + c + size) < 0) c += size; if (cmp_func(base + r, base + c) >= 0) break; swap_func(base + r, base + c, size); } } } EXPORT_SYMBOL(sort);

由此可见linux使用的是堆排序，时间复杂度n(logn).排序时间为O（n log n）的平均和最坏情况下。而qsort比堆排序快20%左右的平均值，它有可利用的 O（N * N）最坏情况下的行为和额外的内存要求，所以不太适合内核使用。堆排序就是那个二叉树排序，每个节点的值必须小于自己的父亲节点的值。内核实现使用了数组存储方式罢了。第一步：调整堆即每次调整都是从父节点、左孩子节点、右孩子节点三者中选择最大者跟父节点进行交换(交换之后可能造成被交换的孩子节点不满足堆的性质，因此每次交换之后要重新对被交换的孩子节点进行调整)。有了初始堆之后就可以进行排序了。第二步：将当前无序区的堆顶元素R[1]同该区间的最后一个记录交换，然后将新的无序区调整为新的堆。堆排序其实也是一种选择排序，是一种树形选择排序。只不过直接选择排序中，为了从R[1...n]中选择最大记录，需比较n-1次，然后从R[1...n-2]中选择最大记录需比较n-2次。事实上这n-2次比较中有很多已经在前面的n-1次比较中已经做过，而树形选择排序恰好利用树形的特点保存了部分前面的比较结果，因此可以减少比较次数。对于n个关键字序列，最坏情况下每个节点需比较log2(n)次，因此其最坏情况下时间复杂度为nlogn。堆排序为不稳定排序，不适合记录较少的排序一个参考博客：http://www.cnblogs.com/jingmoxukong/p/4303826.html