2014-04-29 02:27
题目:找出10亿个数中最小的100万个数,假设内存可以装得下。
解法1:内存可以装得下?可以用快速选择算法得到无序的结果。时间复杂度总体是O(n)级别,但是常系数不小。
代码:
1 // 18.6 Find the smallest one million number among one billion numbers. 2 // Suppose one billion numbers can fit in memory. 3 // I'll use quick selection algorithm to find them. This will return an unsorted result. 4 // Time complexity is O(n), but the constant factor may be massive. I don't quite like this algorithm. 5 #include <algorithm> 6 #include <iostream> 7 #include <vector> 8 using namespace std; 9 10 const int CUT_OFF = 3; 11 12 int medianThree(vector<int> &v, int ll, int rr) 13 { 14 int mm = (ll + rr) / 2; 15 16 if (v[ll] > v[mm]) { 17 swap(v[ll], v[mm]); 18 } 19 if (v[ll] > v[rr]) { 20 swap(v[ll], v[rr]); 21 } 22 if (v[mm] > v[rr]) { 23 swap(v[mm], v[rr]); 24 } 25 swap(v[mm], v[rr - 1]); 26 return v[rr - 1]; 27 } 28 29 void quickSelect(vector<int> &v, int ll, int rr, int k) 30 { 31 // reference from "Data Structure and Algorithm Analysis in C" by Mark Allen Weiss. 32 int pivot; 33 int i, j; 34 35 if (ll + CUT_OFF <= rr) { 36 pivot = medianThree(v, ll, rr); 37 i = ll; 38 j = rr - 1; 39 40 while (true) { 41 while (v[++i] < pivot); 42 while (v[--j] > pivot); 43 if (i > j) { 44 break; 45 } 46 swap(v[i], v[j]); 47 } 48 swap(v[i], v[rr - 1]); 49 50 if (k < i) { 51 return quickSelect(v, ll, i - 1, k); 52 } else if (k > i) { 53 return quickSelect(v, i + 1, rr, k); 54 } 55 } else { 56 for (i = ll; i <= rr; ++i) { 57 for (j = i + 1; j <= rr; ++j) { 58 if (v[i] > v[j]) { 59 swap(v[i], v[j]); 60 } 61 } 62 } 63 } 64 } 65 66 int main() 67 { 68 vector<int> v; 69 vector<int> res; 70 int n, k; 71 int i; 72 int k_small, count; 73 74 while (cin >> n >> k && (n > 0 && k > 0)) { 75 v.resize(n); 76 for (i = 0; i < n; ++i) { 77 cin >> v[i]; 78 } 79 80 // find the kth smallest number 81 // this will change the order of elements 82 quickSelect(v, 0, n - 1, k - 1); 83 k_small = v[k - 1]; 84 count = k; 85 for (i = 0; i < n; ++i) { 86 if (v[i] < k_small) { 87 --count; 88 } 89 } 90 for (i = 0; i < n; ++i) { 91 if (v[i] < k_small) { 92 res.push_back(v[i]); 93 } else if (v[i] == k_small && count > 0) { 94 res.push_back(v[i]); 95 --count; 96 } 97 } 98 99 cout << '{'; 100 for (i = 0; i < k; ++i) { 101 i ? (cout << ' '), 1 : 1; 102 cout << res[i]; 103 } 104 cout << '}' << endl; 105 106 v.clear(); 107 res.clear(); 108 } 109 110 return 0; 111 }
解法2:如果要求结果也是有序的,那可以用最大堆得到有序结果。时间复杂度是O(n * log(m))级别,思路和代码相比快速选择算法都更简单,不过效率低了些。
代码:
1 // 18.6 Find the smallest one million number among one billion numbers. 2 // Suppose one billion numbers can fit in memory. 3 // I'll use a max heap, which runs in O(n * log(k)) time, returns a sorted result. 4 #include <iostream> 5 #include <queue> 6 #include <vector> 7 using namespace std; 8 9 template <class T> 10 struct myless { 11 bool operator () (const T &x, const T &y) { 12 return x < y; 13 }; 14 }; 15 16 int main() 17 { 18 int val; 19 int n, k; 20 int i; 21 // max heap 22 priority_queue<int, vector<int>, myless<int> > q; 23 vector<int> v; 24 25 while (cin >> n >> k && (n > 0 && k > 0)) { 26 k = k < n ? k : n; 27 for (i = 0; i < k; ++i) { 28 cin >> val; 29 q.push(val); 30 } 31 32 for (i = k; i < n; ++i) { 33 cin >> val; 34 if (q.top() > val) { 35 q.pop(); 36 q.push(val); 37 } 38 } 39 while (!q.empty()) { 40 v.push_back(q.top()); 41 q.pop(); 42 } 43 reverse(v.begin(), v.end()); 44 45 cout << '{'; 46 for (i = 0; i < k; ++i) { 47 i ? (cout << ' '), 1 : 1; 48 cout << v[i]; 49 } 50 cout << '}' << endl; 51 52 v.clear(); 53 } 54 55 return 0; 56 }