当数据量特别大时,大到不可以一次性放入到内存中时,如果我们要对数据排序,那么就要使用外部排序,主要分为两个阶段
1.将数据拆分成我们可以使用内部排序来排序的大小,比如10000,然后将结果写入到临时文件中 2.使用归并排序的思想,将各个临时文件合并成一个
代码实现:
首先创建数据:
public class CreateData {
public static void main(String[] args) throws IOException {
DataOutputStream outputStream = FileUtils.getOutputStream("data/largeData.dat");
Random random = new Random();
//产生的数据量
for(int i=0;i<100;i++)
outputStream.writeInt(random.nextInt()%100);
outputStream.close();
DataInputStream inputStream = new DataInputStream(
new BufferedInputStream(
new FileInputStream("data/largeData.dat")));
for(int i=0;i<100;i++) {
if((i+1)%20==0)
System.out.println();
System.out.print(inputStream.readInt()+" ");
}
inputStream.close();
}
}
外部排序实现:
//外部排序
public class ExternalSort {
public static final int MAX_LENGTH = 10;
public static void main(String[] args) throws IOException {
int[] list = new int[MAX_LENGTH];
int n = readAndSort(list);
mergeResultFile(n);
}
/**
* 首尾合并数据
* @param n 文件个数
* @throws IOException
*/
public static void mergeResultFile(int n) throws IOException {
if(n==1) {
return;
}
if(n%2==1) {
for(int i=1;i<n/2+1;i++) {
mergeData("s"+i, "s"+(n-i+1));
}
mergeResultFile(n/2+1);
}else {
for(int i=1;i<=n/2;i++) {
mergeData("s"+i, "s"+(n-i+1));
}
mergeResultFile(n/2);
}
}
// 合并两个文件的数据
public static void mergeData(String s1,String s2) throws IOException {
File file = new File("data/temp.dat");
if(file.exists()) {
file.delete();
}
DataInputStream input1 = FileUtils.getInputStream("data/"+s1+".dat");
DataInputStream input2 = FileUtils.getInputStream("data/"+s2+".dat");
DataOutputStream temp = FileUtils.getOutputStream("data/temp.dat");
int a=0,b=0;
boolean flag1 = false,flag2 = false;
while(true) {
try {
if(!flag1) {
a = input1.readInt();
flag1 = true;
}
} catch (EOFException e) {
break;
}
try {
if(!flag2) {
b = input2.readInt();
flag2 = true;
}
} catch (EOFException e) {
break;
}
// 将小的放到temp中
if(a>b) {
temp.writeInt(b);
flag2 = false;
}else {
temp.writeInt(a);
flag1 = false;
}
}
//文件s2读完,将s1没有读完的复制到temp
if(flag1) {
temp.writeInt(a);
while(true) {
try {
a = input1.readInt();
temp.writeInt(a);
} catch (EOFException e) {
break;
}
}
}
//文件s1读完,将s2没有读完的复制到temp
if(flag2) {
temp.writeInt(b);
while(true) {
try {
b = input2.readInt();
temp.writeInt(b);
} catch (EOFException e) {
break;
}
}
}
input1.close();
input2.close();
temp.close();
copyTempToS1(s1);
}
/**
* 将临时文件的内容写到第一个文件
* @param s1
* @throws IOException
*/
public static void copyTempToS1(String s1) throws IOException{
File file = new File("data/"+s1+".dat");
if(file.exists()) {
file.delete();
file.createNewFile();
}
DataOutputStream output = FileUtils.getOutputStream("data/"+s1+".dat");
DataInputStream temp = FileUtils.getInputStream("data/temp.dat");
int b = 0;
while(true) {
try {
b = temp.readInt();
output.writeInt(b);
} catch (EOFException e) {
break;
}
}
temp.close();
output.close();
}
/**
* 读取数据,排序后写入临时文件,临时文件命名从s1开始,
* @param list
* @return 临时文件的个数
* @throws IOException
*/
public static int readAndSort(int[] list) throws IOException {
DataInputStream inputStream =
FileUtils.getInputStream("data/largeData.dat");
int index = 0;
while(true) {
int len = readData(list, inputStream);
index++;
DataOutputStream outputStream =
FileUtils.getOutputStream("data/s"+index+".dat");
MutilThreadSort.sort(list);
for(int i=0;i<len;i++) {
outputStream.writeInt(list[i]);
}
outputStream.close();
if(len < MAX_LENGTH) {
break;
}
}
return index;
}
// 读取数据,返回长度
public static int readData(int[] list,DataInputStream inputStream) throws IOException {
int i = 0,k=0;
for(i=0;i<MAX_LENGTH;i++) {
try {
k = inputStream.readInt();
} catch (EOFException e) {
break;
}
list[i] = k;
}
return i;
}
}
内部排序的实现:
/**
* forkjoin框架实现多线程排序
* @author 清明
*
*/
public class MutilThreadSort {
public static void sort(int[] list) {
ForkJoinPool forkJoinPool = new ForkJoinPool();
forkJoinPool.invoke(new SortTask(list));
}
static class SortTask extends RecursiveAction {
private static final long serialVersionUID = 1L;
int[] list;
int max = 1000;
public SortTask(int[] list) {
this.list = list;
}
@Override
protected void compute() {
if (list.length <= max) {
quickSort(list, 0, list.length - 1);
} else {
int mid = list.length / 2;
int[] first = Arrays.copyOfRange(list, 0, mid);
int[] second = Arrays.copyOfRange(list, mid, list.length);
invokeAll(new SortTask(first), new SortTask(second));
merge(list, first, second);
}
}
}
// 快速排序
public static void quickSort(int[] list, int start, int end) {
if (start >= end)
return;
int k = list[start], i = start, j = end;
while (i < j) {
while (i < j && list[j] > k) {
j--;
}
if(i<j) {
list[i] = list[j];
i++;
}
while (i < j && list[i] < k) {
i++;
}
if(i<j) {
list[j] = list[i];
j--;
}
}
list[i] = k;
quickSort(list, start, i-1);
quickSort(list, i + 1, end);
}
// 合并
public static void merge(int[] list, int[] first, int[] second) {
int i = 0, j = 0, k = 0;
while (i < first.length && j < second.length) {
if (compare(first[i], second[j])) {
list[k] = first[i];
i++;
} else {
list[k] = second[j];
j++;
}
k++;
}
while (i < first.length) {
list[k++] = first[i++];
}
while (j < first.length) {
list[k++] = first[j++];
}
}
// 比大小
public static boolean compare(int a, int b) {
return a < b;
}
}
File工具类:
public class FileUtils {
public static DataInputStream getInputStream(String path) throws FileNotFoundException {
DataInputStream input = new DataInputStream(
new BufferedInputStream(
new FileInputStream(path)));
return input;
}
public static DataOutputStream getOutputStream(String path) throws FileNotFoundException {
DataOutputStream output = new DataOutputStream(
new BufferedOutputStream(
new FileOutputStream(path)));
return output;
}
}
744

被折叠的 条评论
为什么被折叠?



