针对大文件,一次无法全部读入内存,可以采用将内容保存到文件的方式,进行归并;
可以改进之处,即分割文件,多线程,多机器处理;可以大大提高效率
package com.xiva.demo.sort;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.util.Arrays;
import java.util.LinkedList;
import org.apache.commons.io.IOUtils;
public class SortPractice<E extends Comparable<E>>
{
private static int fileTempNum = 0;
/**
* 归并排序
* @param array
* @param low
* @param high [参数说明]
*
* @return void [返回类型说明]
* @exception throws [违例类型] [违例说明]
* @see [类、类#方法、类#成员]
*/
@SuppressWarnings("unchecked")
public void merge(E[] array, int low, int high)
{
int mid = (high + low) / 2;
int s1 = low;
int s2 = mid + 1;
Object[] tempArr = new Object[high - low + 1];
int tempIdx = 0;
while (s1 <= mid && s2 <= high)
{
if (array[s1].compareTo(array[s2]) <= 0)
{
tempArr[tempIdx] = array[s1++];
tempIdx++;
}
else
{
tempArr[tempIdx] = array[s2++];
tempIdx++;
}
}
if (s1 <= mid)
{
for (int i = s1; i <= mid; i++)
{
tempArr[tempIdx] = array[i];
tempIdx++;
}
}
if (s2 <= high)
{
for (int i = s2; i <= high; i++)
{
tempArr[tempIdx] = array[i];
tempIdx++;
}
}
for (int i = 0; i < tempArr.length; i++)
{
array[low + i] = (E)tempArr[i];
}
}
public void mergeSort(E[] array, int low, int high)
{
if (low < high)
{
mergeSort(array, low, (high + low) / 2);
mergeSort(array, (high + low) / 2 + 1, high);
merge(array, low, high);
}
}
public static void testmerge(int maxLineRead) throws IOException
{
File file = new File("E:\\data\\msisdn.txt");
LinkedList<String> numList = new LinkedList<String>();
BufferedReader bReader = new BufferedReader(new FileReader(file));
LineNumberReader lineReader = new LineNumberReader(new FileReader(file));
int tempFileNum = getLineNumber(lineReader, maxLineRead);
File[] fileArray = new File[tempFileNum];
for (int i = 0; i < tempFileNum; i++)
{
File tempFile = new File("E:\\data\\msisdnSort" + i + ".txt");
fileArray[i] = tempFile;
if (!tempFile.exists())
{
tempFile.createNewFile();
}
}
String line = bReader.readLine();
long startTime = System.currentTimeMillis();
int lineIndex = 1;
int fileIndex = 0;
while (line != null)
{
String trimLine = line.trim();
if (!trimLine.equals(""))
{
numList.add(trimLine);
}
line = bReader.readLine();
lineIndex++;
if (lineIndex > maxLineRead)
{
lineIndex = 1;
FileOutputStream oStream = new FileOutputStream(
fileArray[fileIndex], true);
outputResult2File(numList, oStream);
fileIndex++;
numList = new LinkedList<String>();
}
}
if (numList.size() > 0)
{
FileOutputStream oStream = new FileOutputStream(
fileArray[fileIndex], true);
outputResult2File(numList, oStream);
}
long endTime = System.currentTimeMillis();
System.out.println("MergeSort Time:" + (endTime - startTime));
mergeSortUseFile(fileArray);
long outTime = System.currentTimeMillis();
System.out.println("MergeFile Time:" + (outTime - endTime));
}
private static int getLineNumber(LineNumberReader lineReader,
int maxLineRead) throws IOException
{
String line = lineReader.readLine();
while (line != null)
{
line = lineReader.readLine();
}
int lineNum = lineReader.getLineNumber();
int tempFileNum = lineNum / maxLineRead;
if (lineNum % maxLineRead != 0)
{
tempFileNum = tempFileNum + 1;
}
return tempFileNum;
}
private static void outputResult2File(LinkedList<String> numList,
FileOutputStream oStream) throws IOException
{
SortPractice<String> sort = new SortPractice<String>();
int size = numList.size();
String[] array = new String[size];
for (int j = 0; j < size; j++)
{
array[j] = numList.removeFirst();
}
sort.mergeSort(array, 0, size - 1);
for (int i = 0; i < array.length; i++)
{
IOUtils.write(array[i], oStream);
IOUtils.write(System.getProperty("line.separator"), oStream);
}
}
public static void mergeSortUseFile(File[] fileArray) throws IOException
{
int arrLen = fileArray.length;
if (arrLen < 2)
{
return;
}
int tempFileNum = arrLen / 2;
if (arrLen % 2 != 0)
{
tempFileNum = tempFileNum + 1;
}
File[] tempFileArray = new File[tempFileNum];
for (int i = 0; i < tempFileNum; i++)
{
File tempFile = new File("E:\\data\\msisdnMerge" + fileTempNum + i + ".txt");
tempFileArray[i] = tempFile;
if (!tempFile.exists())
{
tempFile.createNewFile();
}
}
fileTempNum = fileTempNum + tempFileNum;
int fileIndex = 1;
while (fileIndex < arrLen)
{
mergeFile(fileArray[fileIndex - 1],
fileArray[fileIndex],
tempFileArray[fileIndex / 2]);
fileIndex = fileIndex + 2;
}
if(arrLen%2 != 0)
{
tempFileArray[tempFileNum - 1] = fileArray[arrLen - 1];
}
mergeSortUseFile(tempFileArray);
}
public static void mergeFile(File file1, File file2, File tempFile)
throws IOException
{
BufferedReader file1Reader = new BufferedReader(new FileReader(file1));
BufferedReader file2Reader = new BufferedReader(new FileReader(file2));
FileOutputStream oStream = new FileOutputStream(tempFile, true);
String obj1 = file1Reader.readLine();
String obj2 = file2Reader.readLine();
while (obj1 != null && obj2 != null)
{
if (obj1.compareTo(obj2) <= 0)
{
IOUtils.write(obj1, oStream);
IOUtils.write(System.getProperty("line.separator"), oStream);
obj1 = file1Reader.readLine();
}
else
{
IOUtils.write(obj2, oStream);
IOUtils.write(System.getProperty("line.separator"), oStream);
obj2 = file2Reader.readLine();
}
}
if (obj1 != null)
{
while (obj1 != null)
{
IOUtils.write(obj1, oStream);
IOUtils.write(System.getProperty("line.separator"), oStream);
obj1 = file1Reader.readLine();
}
}
if (obj2 != null)
{
while (obj2 != null)
{
IOUtils.write(obj2, oStream);
IOUtils.write(System.getProperty("line.separator"), oStream);
obj2 = file2Reader.readLine();
}
}
}
public static void testArraySort()
{
SortPractice<Integer> sort = new SortPractice<Integer>();
Integer num01 = new Integer(12);
Integer num02 = new Integer(13);
Integer num03 = new Integer(44);
Integer num04 = new Integer(3);
Integer num05 = new Integer(13);
Integer num06 = new Integer(5);
Integer num07 = new Integer(7);
Integer[] array = {num01, num02, num03, num04, num05, num06, num07,
num04, num06, num01};
sort.mergeSort(array, 0, array.length - 1);
System.out.println(Arrays.toString(array));
}
public static void main(String[] args) throws IOException
{
testmerge(100000);
}
}