STL源码解析 - nth_element

最新推荐文章于 2024-03-19 17:18:33 发布

zinnc

最新推荐文章于 2024-03-19 17:18:33 发布

阅读量553

点赞数

分类专栏： C++

C++ 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

以下内容转自：http://blog.csdn.net/lifesider/article/details/6580240

nth_element 模板函数具有两个版本

[cpp]view plaincopy 
   
 template<class _RanIt>  
 void nth_element(_RanIt _First, _RanIt _Nth, _RanIt _Last);  

[cpp]view plaincopy 
   
 template<class _RanIt, class _Pr>  
 void nth_element(_RanIt _First, _RanIt _Nth, _RanIt _Last, _Pr _Pred);  

其功能是对区间 [_First, _Last) 的元素进行重排，其中位于位置 _Nth 的元素与整个区间排序后位于位置 _Nth 的元素相同，并且满足在位置 _Nth 之前的所有元素都“不大于”它和位置 _Nth 之后的所有元素都“不小于”它，而且并不保证 _Nth 的前后两个区间的所有元素保持有序。

第一个版本，比较操作默认使用小于操作符(operator<)；第二个版本，使用自定义谓词 "_Pred" 定义“小于”操作(Less Than)。

算法的空间复杂度为O(1)。

由于算法主要分两部分实现，第一部分是进行二分法弱分区，第二部分是对包含 _Nth 的位置的区间进行插入排序（STL的阈值为32）。当元素较多时平均时间复杂度为O(N)，元素较少时最坏情况下时间复杂度为O(N^2)。

下面针对第一个版本的算法源代码进行注释说明，版本为 Microsoft Visual Studio 2008 SP1 安装包中的 algorithm 文件

[cpp]view plaincopy 
   
 template<class _RanIt> inline  
 void nth_element(_RanIt _First, _RanIt _Nth, _RanIt _Last)  
 {   // order Nth element, using operator<  
     _Nth_element(_CHECKED_BASE(_First), _CHECKED_BASE(_Nth), _CHECKED_BASE(_Last)); // 转调用内部实现函数  
 }  

_Nth_element 函数实现，其中 _ISORT_MAX 值为 32。

[cpp]view plaincopy 
   
 template<class _RanIt> inline  
     void _Nth_element(_RanIt _First, _RanIt _Nth, _RanIt _Last)  
     {   // order Nth element, using operator<  
     _DEBUG_RANGE(_First, _Last);  
     for (; _ISORT_MAX < _Last - _First; )  
         {   // divide and conquer, ordering partition containing Nth  
         pair<_RanIt, _RanIt> _Mid =  
             std::_Unguarded_partition(_First, _Last);  
   
         if (_Mid.second <= _Nth)  
             _First = _Mid.second;  
         else if (_Mid.first <= _Nth)  
             return; // Nth inside fat pivot, done  
         else  
             _Last = _Mid.first;  
         }  
   
     // 插入排序  
     std::_Insertion_sort(_First, _Last);    // sort any remainder  
     }  

_Unguarded_partition 函数实现

[cpp]view plaincopy 
   
 template<class _RanIt> inline  
     pair<_RanIt, _RanIt> _Unguarded_partition(_RanIt _First, _RanIt _Last)  
     {   // partition [_First, _Last), using operator<  
     _RanIt _Mid = _First + (_Last - _First) / 2;    // sort median to _Mid  
     std::_Median(_First, _Mid, _Last - 1);  // 端点排序  
     _RanIt _Pfirst = _Mid;  
     _RanIt _Plast = _Pfirst + 1;    // 起始返回区间为 [_Mid, _Mid + 1)  
   
     // 以下两个循环将不处理与 *_Mid 值相同的元素  
     while (_First < _Pfirst  
         && !_DEBUG_LT(*(_Pfirst - 1), *_Pfirst)  
         && !(*_Pfirst < *(_Pfirst - 1)))  
         --_Pfirst;  
     while (_Plast < _Last  
         && !_DEBUG_LT(*_Plast, *_Pfirst)  
         && !(*_Pfirst < *_Plast))  
         ++_Plast;  
   
     // 当前返回区间为 [_Pfirst, _Plast)，且区间内值均相等  
     _RanIt _Gfirst = _Plast;  
     _RanIt _Glast = _Pfirst;  
   
     for (; ; )  
         {   // partition  
         // 后半区间  
         for (; _Gfirst < _Last; ++_Gfirst)  
             if (_DEBUG_LT(*_Pfirst, *_Gfirst))  // 大于首值，迭代器后移  
                 ;  
             else if (*_Gfirst < *_Pfirst)        // 小于首值，退出循环  
                 break;  
             else  
                 std::iter_swap(_Plast++, _Gfirst);  // 与首值相等，末迭代器后移，更新末值  
         // 前半区间  
         for (; _First < _Glast; --_Glast)  
             if (_DEBUG_LT(*(_Glast - 1), *_Pfirst)) // 小于首值，迭代器前移  
                 ;  
             else if (*_Pfirst < *(_Glast - 1))       // 大于首值，退出循环  
                 break;  
             else  
                 std::iter_swap(--_Pfirst, _Glast - 1);  // 与首值相等，首迭代器前移，更新首值  
   
         // 整体区间已经处理结束  
         if (_Glast == _First && _Gfirst == _Last)  
             return (pair<_RanIt, _RanIt>(_Pfirst, _Plast));  
   
         // 到达起点  
         if (_Glast == _First)  
             {   // no room at bottom, rotate pivot upward  
             if (_Plast != _Gfirst)  
                 std::iter_swap(_Pfirst, _Plast);    // if 成立，_Pfirst 暂存大值  
             ++_Plast;                               // 末迭代器后移  
             std::iter_swap(_Pfirst++, _Gfirst++);   // if 成立时，小值将存于返回区间首，最终结果是，返回区间整体右移  
             }  
         else if (_Gfirst == _Last)  // 到达终点  
             {   // no room at top, rotate pivot downward  
             if (--_Glast != --_Pfirst)  
                 std::iter_swap(_Glast, _Pfirst);    // if 成立，_Pfirst 暂存大值  
             std::iter_swap(_Pfirst, --_Plast);  // if 成立时，大值将存于返回区间尾，最终结果是，返回区间整体左移  
             }  
         else  
             std::iter_swap(_Gfirst++, --_Glast);    // 交换后，*_Glast < *_Pfirst < *(_Gfirst - 1)  
         }  
     }  

_Median 和 _Med3 两个函数，其作用是对区间内的特定几个数进行排序

[cpp]view plaincopy 
   
 template<class _RanIt> inline  
     void _Med3(_RanIt _First, _RanIt _Mid, _RanIt _Last)  
     {   // sort median of three elements to middle - 3 点排序  
     if (_DEBUG_LT(*_Mid, *_First))  
         std::iter_swap(_Mid, _First);  
     if (_DEBUG_LT(*_Last, *_Mid))  
         std::iter_swap(_Last, _Mid);  
     if (_DEBUG_LT(*_Mid, *_First))  
         std::iter_swap(_Mid, _First);  
     }  
   
 template<class _RanIt> inline  
     void _Median(_RanIt _First, _RanIt _Mid, _RanIt _Last)  
     {   // sort median element to middle  
     if (40 < _Last - _First)  
         {   // median of nine - 9 端点排序  
         size_t _Step = (_Last - _First + 1) / 8;  
         std::_Med3(_First, _First + _Step, _First + 2 * _Step);  
         std::_Med3(_Mid - _Step, _Mid, _Mid + _Step);  
         std::_Med3(_Last - 2 * _Step, _Last - _Step, _Last);  
         std::_Med3(_First + _Step, _Mid, _Last - _Step);  
         }  
     else  
         std::_Med3(_First, _Mid, _Last);  
     }  

对于第二个版本，算法思想相同，只是要做比较操作时，将用 _Pred 替换 operator< 操作符，同时也看到算法的核心主要在于 _Unguarded_partition 这个函数。

_Insertion_sort 函数，插入排序

[cpp]view plaincopy 
   
 template<class _BidIt> inline  
     void _Insertion_sort(_BidIt _First, _BidIt _Last)  
     {   // insertion sort [_First, _Last), using operator<  
     std::_Insertion_sort1(_First, _Last, _Val_type(_First)); // 转调用 _Insertion_sort1  
     }  

_Insertion_sort1 函数

[cpp]view plaincopy 
   
 template<class _BidIt,  
     class _Ty> inline  
     void _Insertion_sort1(_BidIt _First, _BidIt _Last, _Ty *)  
     {   // insertion sort [_First, _Last), using operator<  
     if (_First != _Last)  
         for (_BidIt _Next = _First; ++_Next != _Last; )  
             {   // order next element  
             _BidIt _Next1 = _Next;  
             _Ty _Val = *_Next;  
   
             // 小于首值时，整体后移，有可能使用 memmove，因而存在优化  
             if (_DEBUG_LT(_Val, *_First))  
                 {   // found new earliest element, move to front - [_First, _Next) => [..., ++Next1)  
                 _STDEXT unchecked_copy_backward(_First, _Next, ++_Next1);  
                 *_First = _Val;  
                 }  
             else  
                 {   // look for insertion point after first  
                 for (_BidIt _First1 = _Next1;  
                     _DEBUG_LT(_Val, *--_First1);  
                     _Next1 = _First1)  
                     *_Next1 = *_First1; // move hole down - 逐项后移  
                 *_Next1 = _Val; // insert element in hole  
                 }  
             }  
     }  

至此，我们已经完全理解 nth_element 的算法思想了，并且明白为何它的时间复杂度和空间复杂度都很低，当不需要对某个数组进行全部排序而想找出满足某一条件（_Pred）的第 N 个值时，便可采用此算法，同时需要注意的是，此算法只对“随机访问迭代器”有效（如 vector），如果需要对 list 使用此算法，可先将 list 的所有元素拷贝至 vector（或者存储 list::iterator，对自定义类型效率更高），再使用此算法。

代码版本来源于Microsoft Visual Studio 2008 安装包中<algorithm>文件，版权归原作者所有！