要求:给定一个文件包含10亿个数,文件每一行为一个浮点数,找出最大的1万个
算法描述:
基本思路:每个浮点数8B,10亿个会溢出,采取分治法先先处理一部分数据,然后对处理后的数据进行综合
以下采用是算法流程,使用Pasacle和JAVA的语法,第一个函数是主函数。
- dataCompress()
- BEGIN
- List<float[]> list = new List<float[]>();
- WHILE not reach EOF
- float[] partFile = readFileTop1MilleLine();
- float[] top10Thousand = searchTop(partFile, 10000);
- list.add(top10Thousand);
- END WHILE
- float[][] parts= list.toArray();
- float[] result = merge(parts)
- END
- float[] searchTop(float[] arr, int topCount)
- BEGIN
- boolean isSwap = true;
- int min = 0, minIndex = 0;
- FOR int currentIndex = topCount;currentIndex < arr.length();currentIndex++
- IF isSwap == true
- min = arr[0];
- minIndex = 0;
- FOR(int i = 1; i < topCount; i++)
- IF arr[i] < min
- min = arr[i];
- minIndex = i;
- ENDIF
- ENDFOR
- ENDIF
- IF arr[currentIndex] > min
- swap(arr, currentIndex, minIndex);
- isSwap = true;
- ENDIF
- ELSE
- isSwap = false;
- ENDELSE
- ENDFOR
- END
- float[] merge(float[][] arr)
- BEGIN
- IF arr.length() == 1
- return arr[0];
- ENDIF
- IF arr.length() == 2
- return merge(arr[0], arr[1])
- ENDIF
- int len = arr.length;
- int mid = len/2 - 1;
- int leftPart = mid + 1;
- int rightPart = len - mid - 1;
- float[][] arr1 = new float[leftPart][];
- flaot[][] arr2 = new float[rightPart][];
- float[] result1 = merge(arr1);
- float[] result2 = merge(arr2);
- return merge(result1, result2);
- END
- float[] merge(float[] arr1, float[] arr2)
- BEGIN
- List<float> list = new List<float>();
- int arr1Index = 0, arr2Index = 0, arr1Len = arr1.length(), arr2Len = arr2.length();
- WHILE(arr1Index < arr1Len && arr2Index < arr2Len)
- IF arr1[arr1Index] > arr2[arr2Index]
- list.add(arr1[arr1Index]);
- arr1Index++;
- ENDIF
- ELSEIF arr1[arr1Index] < arr2[arr2Index]
- list.add(arr2[arr2Index])
- arr2Index++;
- ENDELSE
- ELSE
- list.add(arr2[arr2Index])
- arr1Index++;
- arr2Index++;
- ENDELSE
- ENDWHILE
- WHILE arr1Index < arr1Len
- list.add(arr1[arr1Index]);
- arr1Index++;
- ENDWHILE
- WHILE arr2Index < arr2Len
- list.add(arr2[arr2Index])
- arr2Index++;
- ENDWHILE
- return list.toArray();
- END