外部排序

当数据量特别大时,大到不可以一次性放入到内存中时,如果我们要对数据排序,那么就要使用外部排序,主要分为两个阶段

1.将数据拆分成我们可以使用内部排序来排序的大小,比如10000,然后将结果写入到临时文件中                                                      2.使用归并排序的思想,将各个临时文件合并成一个

代码实现:

首先创建数据:

public class CreateData {
	public static void main(String[] args) throws IOException {
		DataOutputStream outputStream = FileUtils.getOutputStream("data/largeData.dat");
		
		Random random = new Random();
        //产生的数据量
		for(int i=0;i<100;i++)
			outputStream.writeInt(random.nextInt()%100);
		
		outputStream.close();
		
		DataInputStream inputStream = new DataInputStream(
				new BufferedInputStream(
				new FileInputStream("data/largeData.dat")));
		
		for(int i=0;i<100;i++) {
			if((i+1)%20==0)
				System.out.println();
			System.out.print(inputStream.readInt()+" ");
		}
		inputStream.close();
	}
}

外部排序实现:

//外部排序
public class ExternalSort {
	public static final int MAX_LENGTH = 10;
	
	public static void main(String[] args) throws IOException {
		int[] list = new int[MAX_LENGTH];
		
		int n = readAndSort(list);
		
		mergeResultFile(n);
	}
	
	/**
	 * 首尾合并数据
	 * @param n 文件个数
	 * @throws IOException
	 */
	public static void mergeResultFile(int n) throws IOException {
	    if(n==1) {
	        return;
	    }
	    
	    if(n%2==1) {
    	    for(int i=1;i<n/2+1;i++) {
    	        mergeData("s"+i, "s"+(n-i+1));
    	    }
    	    mergeResultFile(n/2+1);
	    }else {
	        for(int i=1;i<=n/2;i++) {
                mergeData("s"+i, "s"+(n-i+1));
            }
	        mergeResultFile(n/2);
	    }
	}
	
	// 合并两个文件的数据
	public static void mergeData(String s1,String s2) throws IOException {
	    
	    File file = new File("data/temp.dat");
	    if(file.exists()) {
	        file.delete();
	    }
	    DataInputStream input1 = FileUtils.getInputStream("data/"+s1+".dat");
	    DataInputStream input2 = FileUtils.getInputStream("data/"+s2+".dat");
	    DataOutputStream temp = FileUtils.getOutputStream("data/temp.dat");
		
		int a=0,b=0;
		boolean flag1 = false,flag2 = false;
		while(true) {
			try {
			    if(!flag1) {
    				a = input1.readInt();
    				flag1 = true;
			    }
			} catch (EOFException e) {
				break;
			}
			
			try {
			    if(!flag2) {
    				b = input2.readInt();
    				flag2 = true;
			    }
			} catch (EOFException e) {
				break;
			}
			
			// 将小的放到temp中
			if(a>b) {
				temp.writeInt(b);
				flag2 = false;
			}else {
			    temp.writeInt(a);
                flag1 = false;
			}
		}
		
		//文件s2读完,将s1没有读完的复制到temp
		if(flag1) {
			temp.writeInt(a);
			while(true) {
			    try {
	                a = input1.readInt();
	                temp.writeInt(a);
	            } catch (EOFException e) {
	                break;
	            }
			}
		}
		
		//文件s1读完,将s2没有读完的复制到temp
        if(flag2) {
            temp.writeInt(b);
            while(true) {
                try {
                    b = input2.readInt();
                    temp.writeInt(b);
                } catch (EOFException e) {
                    break;
                }
            }
        }
		
		input1.close();
		input2.close();
		temp.close();
		
		copyTempToS1(s1);
	}
	
	/**
	 * 将临时文件的内容写到第一个文件
	 * @param s1
	 * @throws IOException
	 */
	public static void copyTempToS1(String s1) throws IOException{
	    File file = new File("data/"+s1+".dat");
        if(file.exists()) {
            file.delete();
            file.createNewFile();
        }
        DataOutputStream output = FileUtils.getOutputStream("data/"+s1+".dat");
        DataInputStream temp = FileUtils.getInputStream("data/temp.dat");
        int b = 0;
        while(true) {
            try {
                b = temp.readInt();
                output.writeInt(b);
            } catch (EOFException e) {
                break;
            }
        }
        temp.close();
        output.close();
	}
	
	/**
	 * 读取数据,排序后写入临时文件,临时文件命名从s1开始,
	 * @param list
	 * @return 临时文件的个数
	 * @throws IOException
	 */
	public static int readAndSort(int[] list) throws IOException {
		DataInputStream inputStream = 
		        FileUtils.getInputStream("data/largeData.dat");
		
		int index = 0;
		while(true) {
			int len = readData(list, inputStream);
			index++;
			DataOutputStream outputStream = 
			        FileUtils.getOutputStream("data/s"+index+".dat");
                
            MutilThreadSort.sort(list);
            for(int i=0;i<len;i++) {
                outputStream.writeInt(list[i]);
            }
            outputStream.close();
            if(len < MAX_LENGTH) {
                break;
            }
		}
		return index;
	}
	
	// 读取数据,返回长度
	public static int readData(int[] list,DataInputStream inputStream) throws IOException {
		int i = 0,k=0;
		for(i=0;i<MAX_LENGTH;i++) {
			
			try {
				k = inputStream.readInt();
			} catch (EOFException e) {
				break;
			}
			list[i] = k;
		}
		return i;
	}
}

内部排序的实现:

/**
 * forkjoin框架实现多线程排序
 * @author 清明
 *
 */
public class MutilThreadSort {

	public static void sort(int[] list) {
		ForkJoinPool forkJoinPool = new ForkJoinPool();
		forkJoinPool.invoke(new SortTask(list));
	}

	static class SortTask extends RecursiveAction {

		private static final long serialVersionUID = 1L;
		int[] list;
		int max = 1000;

		public SortTask(int[] list) {
			this.list = list;
		}

		@Override
		protected void compute() {
			if (list.length <= max) {
				quickSort(list, 0, list.length - 1);
			} else {
				int mid = list.length / 2;
				int[] first = Arrays.copyOfRange(list, 0, mid);
				int[] second = Arrays.copyOfRange(list, mid, list.length);

				invokeAll(new SortTask(first), new SortTask(second));

				merge(list, first, second);
			}
		}

	}

	// 快速排序
	public static void quickSort(int[] list, int start, int end) {
		if (start >= end)
			return;

		int k = list[start], i = start, j = end;

		while (i < j) {
			while (i < j && list[j] > k) {
				j--;
			}
			if(i<j) {
				list[i] = list[j];
				i++;
			}

			while (i < j && list[i] < k) {
				i++;
			}

			if(i<j) {
				list[j] = list[i];
				j--;
			}
		}
		list[i] = k;
		quickSort(list, start, i-1);
		quickSort(list, i + 1, end);
	}

	// 合并
	public static void merge(int[] list, int[] first, int[] second) {
		int i = 0, j = 0, k = 0;
		while (i < first.length && j < second.length) {
			if (compare(first[i], second[j])) {
				list[k] = first[i];
				i++;
			} else {
				list[k] = second[j];
				j++;
			}
			k++;
		}

		while (i < first.length) {
			list[k++] = first[i++];
		}

		while (j < first.length) {
			list[k++] = first[j++];
		}
		
	}

	// 比大小
	public static boolean compare(int a, int b) {
		return a < b;
	}
}

File工具类:

public class FileUtils {
    
    public static DataInputStream getInputStream(String path) throws FileNotFoundException {
        DataInputStream input = new DataInputStream(
                new BufferedInputStream(
                new FileInputStream(path)));
        return input;
    }
    
    public static DataOutputStream getOutputStream(String path) throws FileNotFoundException {
        DataOutputStream output = new DataOutputStream(
                new BufferedOutputStream(
                new FileOutputStream(path)));
        return output;
    }
}

git地址:https://github.com/201531107001/gqm-practice

  • 2
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值