TimSort

最新推荐文章于 2024-01-07 06:33:33 发布

等一轮明月丶

最新推荐文章于 2024-01-07 06:33:33 发布

阅读量212

点赞数

文章标签：算法

本文链接：https://blog.csdn.net/weixin_44831838/article/details/117329869

版权

关于Timsort原理部分可以看一下

b站视频讲解

github别人整理的文档

今天分析一下java的timsort相关的实现。

java中的Arrays工具类的静态方法sort():

public static void sort(Object[] a, int fromIndex, int toIndex) {
        /**
        *首先进行边界检查，可能会报出IllegalArgumentException（fromIndex>toIndex）
        *或者ArrayIndexOutOfBoundsException异常
        */
        rangeCheck(a.length, fromIndex, toIndex);
    	//如果要求使用legacyMergeSort，则优先使用这个排序方法，不是本文的重点
    
        if (LegacyMergeSort.userRequested)
            legacyMergeSort(a, fromIndex, toIndex);
        else
            //下面的方法就是TimSort，跳进去看一下
            ComparableTimSort.sort(a, fromIndex, toIndex, null, 0, 0);
    }

ComparaleTimSort直接继承Object类，并有如下的域和方法：

TimSort类信息

直接看sort方法：

static void sort(Object[] a, int lo, int hi, Object[] work, int workBase, int workLen) {
    	//保证输入的数据的正确性
        assert a != null && lo >= 0 && lo <= hi && hi <= a.length;
		
        int nRemaining  = hi - lo;
        if (nRemaining < 2)
            return;  // Arrays of size 0 and 1 are always sorted
    	/**
    	* 
    	*静态常量MIN_MERGE定义为private static final int MIN_MERGE = 32;
    	*就是说当待排序数组的长度小于32时就是用二分插入排序
    	*/
        if (nRemaining < MIN_MERGE) {
            /**
            *此方法对a从lo开始进行排序，排序的结果是从a[lo]开始
            *
            * a[lo] <= a[lo + 1] <= a[lo + 2] <= ...
            *
            * 或者
            *
            * a[lo] >  a[lo + 1] >  a[lo + 2] >  ...
            *
            * 并且返回这样的一个序列的长度
            *这里就对应一个run，保证了数组的局部有序，便于后面使用merge方法
            */
            int initRunLen = countRunAndMakeAscending(a, lo, hi);
            //对于小型数组最好的排序方法
            binarySort(a, lo, hi, lo + initRunLen);
            return;
        }

        /**
         * March over the array once, left to right, finding natural runs,
         * extending short natural runs to minRun elements, and merging runs
         * to maintain stack invariant.
         */
    	//这个时候才开始真正的timsort
        ComparableTimSort ts = new ComparableTimSort(a, work, workBase, workLen);
    	//调用minRunLegth(int)方法求minRunLength
        int minRun = minRunLength(nRemaining);
        do {
            
            // Identify next run
            //确定下一个递增或者严格递减的区间
            int runLen = countRunAndMakeAscending(a, lo, hi);
            //区间长度不满足要求，即小于minrun，则对该区间进行二分插入，使得长度满足要求
            if (runLen < minRun) {
                int force = nRemaining <= minRun ? nRemaining : minRun;
                binarySort(a, lo, lo + force, lo + runLen);
                runLen = force;
            }

            // 将当前run信息压入栈（就是两个一维的数组和一个记录当前数组中元素个数的int，两个一维数组分别存储基址runbase和长度runlength）中，
            ts.pushRun(lo, runLen);
            //合并栈中的run(满足合并要求)，使得栈中的run长度存在一个指数下降的关系，这样的话在合并的时候相邻的两个run的runsize相近，进行merge操作时高效
            ts.mergeCollapse();

            // Advance to find next run
            lo += runLen;
            nRemaining -= runLen;
        } while (nRemaining != 0);

        // Merge all remaining runs to complete sort
        assert lo == hi;
        ts.mergeForceCollapse();
    	//保证合并成功，否则抛出错误AssertionError
        assert ts.stackSize == 1;
    }

下面看一下合并run的操作

/**
*合并栈中的run(满足合并要求)，使得栈中的run长度存在一个指数下降的关系，这样的话在合并的时候相邻的两个run的*runsize相近，进行merge操作时高效
*/
private void mergeCollapse() {
        while (stackSize > 1) {
            int n = stackSize - 2;
            if (n > 0 && runLen[n-1] <= runLen[n] + runLen[n+1]) {
                if (runLen[n - 1] < runLen[n + 1])
                    n--;
                mergeAt(n);
            } else if (runLen[n] <= runLen[n + 1]) {
                mergeAt(n);
            } else {
                break; // Invariant is established
            }
        }
    }

其中调用了mergeAt函数

//合并栈中的第i个run和第i+1个run
private void mergeAt(int i) {
        assert stackSize >= 2;
        assert i >= 0;
        assert i == stackSize - 2 || i == stackSize - 3;

        int base1 = runBase[i];
        int len1 = runLen[i];
        int base2 = runBase[i + 1];
        int len2 = runLen[i + 1];
        assert len1 > 0 && len2 > 0;
        assert base1 + len1 == base2;

        /*
         * Record the length of the combined runs; if i is the 3rd-last
         * run now, also slide over the last run (which isn't involved
         * in this merge).  The current run (i+1) goes away in any case.
         */
    	//新的合并的run的长度
        runLen[i] = len1 + len2;
        if (i == stackSize - 3) {
            runBase[i + 1] = runBase[i + 2];
            runLen[i + 1] = runLen[i + 2];
        }
        stackSize--;

        /*
         * Find where the first element of run2 goes in run1. Prior elements
         * in run1 can be ignored (because they're already in place).
         */
        int k = gallopRight((Comparable<Object>) a[base2], a, base1, len1, 0);
        assert k >= 0;
        base1 += k;
        len1 -= k;
        if (len1 == 0)
            return;

        /*
         * Find where the last element of run1 goes in run2. Subsequent elements
         * in run2 can be ignored (because they're already in place).
         */
        len2 = gallopLeft((Comparable<Object>) a[base1 + len1 - 1], a,
                base2, len2, len2 - 1);
        assert len2 >= 0;
        if (len2 == 0)
            return;

        // Merge remaining runs, using tmp array with min(len1, len2) elements
        if (len1 <= len2)
            mergeLo(base1, len1, base2, len2);
        else
            mergeHi(base1, len1, base2, len2);
    }

其中插入使用了gallopMode进行插入排序，（这部分的代码明天再看）

分为gallopLeft和gallopRight；

下面的函数定位应该将key插入到数组a中从base开始，长度为len的子数组中的哪个位置；如果数组中存在相等的值，则返回最左的那个index（翻译源码注释），hint表示在a[base+hint]位置附近开始搜索key；

算法思路如下：（其实整理为一个流程图更好）

1.初始化变量lastofs=0、ofs=1；判断key是否大于a[base+hint]，大于的话跳转至2（这一步就可以确定接下来的搜索范围是[[base+hint,len]），否则跳转至4（这一步就可以确定接下来的搜索范围是[base,base+hint]）

2.判断key是否大于a[base+hint+ofs]，若大于则

lastofs=ofs;
ofs=(ofs<<1)+1;//指数变化

并跳转至2；若小于则跳转至3

3.进行赋值

lastOfs += hint;
ofs += hint;

此时可以确定这样的一个关系，a[base+lastOfs] < key <= a[base+ofs]；跳转至6；

4…判断key是否小于等于a[base+hint-ofs]，若小于等于则

lastOfs = ofs;
ofs = (ofs << 1) + 1;

并跳转至2，若大于则跳转至5

5.进行赋值

lastOfs = hint - ofs;
ofs = hint - tmp;

此时可以确定这样的一个关系，a[base+lastOfs] < key <= a[base+ofs]；跳转至6；

6.利用二分查找寻找key插入的值；

下面的是源码

private static int gallopLeft(Comparable<Object> key, Object[] a,
            int base, int len, int hint) {
        assert len > 0 && hint >= 0 && hint < len;
    

        int lastOfs = 0;
        int ofs = 1;
        if (key.compareTo(a[base + hint]) > 0) {
            // Gallop right until a[base+hint+lastOfs] < key <= a[base+hint+ofs]
            int maxOfs = len - hint;
            while (ofs < maxOfs && key.compareTo(a[base + hint + ofs]) > 0) {
                lastOfs = ofs;
                ofs = (ofs << 1) + 1;
                if (ofs <= 0)   // int overflow
                    ofs = maxOfs;
            }
            if (ofs > maxOfs)
                ofs = maxOfs;

            // Make offsets relative to base
            lastOfs += hint;
            ofs += hint;
        } else { // key <= a[base + hint]
            // Gallop left until a[base+hint-ofs] < key <= a[base+hint-lastOfs]
            final int maxOfs = hint + 1;
            while (ofs < maxOfs && key.compareTo(a[base + hint - ofs]) <= 0) {
                lastOfs = ofs;
                ofs = (ofs << 1) + 1;
                if (ofs <= 0)   // int overflow
                    ofs = maxOfs;
            }
            if (ofs > maxOfs)
                ofs = maxOfs;

            // Make offsets relative to base
            int tmp = lastOfs;
            lastOfs = hint - ofs;
            ofs = hint - tmp;
        }
        assert -1 <= lastOfs && lastOfs < ofs && ofs <= len;

        /*
         * Now a[base+lastOfs] < key <= a[base+ofs], so key belongs somewhere
         * to the right of lastOfs but no farther right than ofs.  Do a binary
         * search, with invariant a[base + lastOfs - 1] < key <= a[base + ofs].
         */
        lastOfs++;
        while (lastOfs < ofs) {
            int m = lastOfs + ((ofs - lastOfs) >>> 1);

            if (key.compareTo(a[base + m]) > 0)
                lastOfs = m + 1;  // a[base + m] < key
            else
                ofs = m;          // key <= a[base + m]
        }
        assert lastOfs == ofs;    // so a[base + ofs - 1] < key <= a[base + ofs]
        return ofs;
    }

等一轮明月丶

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
TimSort

关于Timsort原理部分可以看一下b站视频讲解github别人整理的文档今天分析一下java的timsort相关的实现。java中的Arrays工具类的静态方法sort():public static void sort(Object[] a, int fromIndex, int toIndex) { /** *首先进行边界检查，可能会报出IllegalArgumentException（fromIndex>toIndex） *或者Arra
复制链接

扫一扫