JavaNio实现归并算法

1.创造原文件

package www;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.util.Random;

public class Test {

	public static void main(String[] args) throws Exception {
		Random r = new Random();
		FileWriter fw = new FileWriter("t1.txt");
		BufferedWriter bw = new BufferedWriter(fw);
		for (int x = 0; x < 5; x++) {
			int[] larray = new int[10000000];
			for (int i = 0; i < larray.length; i++) {
				larray[i] = r.nextInt(100000000);
			}
			for (int i : larray) {
				bw.write(String.valueOf(i));
				bw.newLine();
			}
		}
		bw.flush();
		bw.close();
		fw.close();
	}

}

2.归并文件算法实现

package www;

import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;

public class Test2 {
	static int BLOCK_SIZE = 33 * 1024 * 1024;
	static String DIR_PATH = "temp";
	static String root = "t1.txt";

	public static void main(String[] args) {
		System.out.println("====清空工作文件夹!====");
		File dir = new File(DIR_PATH);
		dir.mkdir();
		File[] listFiles = dir.listFiles();
		for (File file : listFiles) {
			file.delete();
		}

		merge_sort(root, DIR_PATH);
		
		// printFile("temp\\3_1.txt", true);
	}

	static void merge_sort(String root, String workDir) {

		// ---------------
		// 文件分割部分
		// ---------------
		splitBigFile(root, workDir);
		// ---------------
		// 文件分合并部分
		// ---------------
		mergeFile(workDir);
	}

	private static void mergeFile(String workDir) {
		System.out.println("====开始归并数据====");
		printFreeMemory();
		Integer mergeTime = 0;
		while (true) {
			File wdir = new File(workDir);
			String fileHeader = mergeTime + "_";
			FileFilter fileFilter = new FileFilter() {
				@Override
				public boolean accept(File pathname) {
					return pathname.getName().indexOf(fileHeader) > -1;
				}
			};
			mergeTime = mergeTime + 1;
			int mergeNewTime = 1;
			File[] listFiles = wdir.listFiles(fileFilter);
			if (listFiles.length == 1) {
				System.err.println("====结束归并,最后合成文件为:" + listFiles[0].getName() + " ====");
				printFile(listFiles[0].getAbsolutePath(), false);
				break;
			}

			for (int i = 0; i < listFiles.length; i++) {
				if (listFiles.length % 2 != 0 && i == (listFiles.length - 1)) {
					File newFile = new File(workDir + File.separator + mergeTime + "_" + mergeNewTime + ".txt");
					System.err.println("==== 单数文件块,提升为下步骤归并块 ===");
					System.out.println(listFiles[i].getName() + " --> " + newFile.getName());
					listFiles[i].renameTo(newFile);
					printFreeMemory();
					break;
				}
				File in1 = listFiles[i];
				++i;
				File in2 = listFiles[i];
				merge(in1.getAbsolutePath(), in2.getAbsolutePath(),
						workDir + File.separator + mergeTime + "_" + mergeNewTime + ".txt");
				++mergeNewTime;
			}
		}
		System.out.println("====归并数据结束====");
	}

	static void merge(String in_path1, String in_path2, String out_path) {
		System.out.println("====开始归并数据块文件====");
		System.out.println("====in1 :" + in_path1 + " in2:" + in_path2 + " out:" + out_path);
		printFreeMemory();
		// 解耦
		InputBuffer ib1 = new InputBuffer(BLOCK_SIZE, in_path1);
		InputBuffer ib2 = new InputBuffer(BLOCK_SIZE, in_path2);
		OutputBuffer ob = new OutputBuffer(BLOCK_SIZE, out_path);

		// 归并算法实现,归并部分实现
		Integer r1 = null;
		Integer r2 = null;
		while (true) {
			if (ib1.hashNext() && r1 == null) {
				r1 = ib1.read();
			}
			if (ib2.hashNext() && r2 == null) {
				r2 = ib2.read();
			}
			if (r1 != null && r2 != null) {
				if (r1.intValue() <= r2.intValue()) {
					ob.write(r1);
					r1 = null;
				} else {
					ob.write(r2);
					r2 = null;
				}
			} else if (r1 != null) {
				ob.write(r1);
				r1 = null;
			} else if (r2 != null) {
				ob.write(r2);
				r2 = null;
			} else { // r1 == null && r2 == null
				break;
			}
		}
		ib1.close();
		ib2.close();
		ob.close();
		System.out.println("====归并数据块结束====");
		printFreeMemory();
	}

	static class InputBuffer {
		// http://ifeve.com/buffers/
		ByteBuffer array;
		String file_path;
		FileChannel inChannel;
		RandomAccessFile randomAccessFile;
		CharsetDecoder utf8Decoder = Charset.forName("UTF-8").newDecoder();
		// 大于0则有数据
		int nextFlag = 0;

		public InputBuffer(int capacity, String file_path) {
			array = ByteBuffer.allocate(capacity);
			array.flip();
			this.file_path = file_path;
			try {
				randomAccessFile = new RandomAccessFile(new File(file_path), "rw");
				inChannel = randomAccessFile.getChannel();
			} catch (Exception e) {
				e.printStackTrace();
			}
		}

		public Integer read() {
			Integer res = null;
			if (!array.hasRemaining())
				nextFlag = readFileDate();
			ByteBuffer temp = ByteBuffer.allocate(48);
			boolean readFirstEffect = false;
			while (true) {
				if (array.hasRemaining()) {
					byte b = array.get();
					// -回车(Carriage Return)即\r,ascii码13(0x0d),作用是将光标移到一行的开始位置
					// - 换行(LineFeed)即\n,ascii码10(0x0a),作用是将光标移到下一行
					// 去除空行、错误回车、错误换行
					if (!readFirstEffect && (b == 0x0d || b == 0x0a))
						continue;

					if (b != 0x0d) {
						readFirstEffect = true;
						temp.put(b);
					} else {
						// 过滤\r\n
						if (array.hasRemaining())
							array.get();
						break;
					}
				} else {
					int readFileDate = readFileDate();
					if (readFileDate == -1) {
						nextFlag = readFileDate;
						break;
					}
				}
			}
			// 解码
			temp.flip();
			if (temp.hasRemaining()) {
				String dst = decoderByte(temp);
				if (dst == null)
					throw new NullPointerException();
				else
					res = Integer.parseInt(dst);
			}
			temp.clear();
			return res;
		}

		private String decoderByte(ByteBuffer temp) {
			try {
				CharBuffer decode = utf8Decoder.decode(temp);
				return decode.toString();
			} catch (CharacterCodingException e) {
				e.printStackTrace();
			}
			return null;
		}

		public boolean hashNext() {
			return nextFlag != -1;
		}

		private int readFileDate() {
			try {
				array.clear();
				int read = inChannel.read(array);
				array.flip();
				return read;
			} catch (Exception e) {
				e.printStackTrace();
			}
			return -1;
		}

		public void close() {
			if (inChannel != null) {
				try {
					array.clear();
					array = null;
					inChannel.close();
					randomAccessFile.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}

	}

	static class OutputBuffer {
		ByteBuffer array;
		String file_path;
		FileChannel outChannel;
		RandomAccessFile randomAccessFile;
		byte[] RN = new byte[] { 0x0d, 0x0a };

		public OutputBuffer(int capacity, String file_path) {
			array = ByteBuffer.allocate(capacity);
			this.file_path = file_path;
			try {
				randomAccessFile = new RandomAccessFile(new File(file_path), "rw");
				outChannel = randomAccessFile.getChannel();
			} catch (Exception e) {
				e.printStackTrace();
			}
		}

		public void write(int val) {
			ByteBuffer temp = ByteBuffer.allocate(48);
			byte[] bytes = String.valueOf(val).getBytes();
			temp.put(bytes);
			temp.put(RN);
			temp.flip();
			int dataSize = temp.limit();
			int remaining = array.limit() - array.position();
			if (remaining >= dataSize) {
				array.put(temp);
			} else {
				writeDisk();
				array.put(temp);
			}
		}

		private void writeDisk() {
			try {
				array.flip();
				outChannel.write(array);
				array.clear();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}

		public void close() {
			if (outChannel != null) {
				try {
					writeDisk();
					outChannel.force(true);
					outChannel.close();
					randomAccessFile.close();
					array.clear();
					array = null;
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
	}

	static void splitBigFile(String src, String workDir) {
		printFreeMemory();
		System.out.println("====开始分割文件====");
		System.err.println("====原始文件:" + new File(src).length() / 1024 / 1024 + "====");
		int loopTime = 0;
		String fileHerader = "0_";

		InputBuffer ib1 = new InputBuffer(BLOCK_SIZE, src);
		Integer read;
		int[] temp = new int[6500000];
		int i = 0;
		while ((read = ib1.read()) != null) {
			temp[i] = read;
			if (i == temp.length - 1) {
				sort(temp, 0, temp.length - 1);
				File file = new File(workDir, fileHerader + loopTime + ".txt");
				OutputBuffer ob = new OutputBuffer(BLOCK_SIZE, file.getAbsolutePath());
				for (int j : temp) {
					ob.write(j);
				}
				ob.close();
				++loopTime;
				i = 0;
				System.out.println("====分割文件:" + file.getName() + " ====");
				printFreeMemory();
			} else {
				i++;
			}
		}

		if (i != 0) {
			int[] temp2 = new int[i];
			for (int j = 0; j < temp2.length; j++) {
				temp2[j] = temp[j];
			}
			sort(temp2, 0, temp2.length - 1);
			File file = new File(workDir, fileHerader + loopTime + ".txt");
			OutputBuffer ob = new OutputBuffer(BLOCK_SIZE, file.getAbsolutePath());
			for (int j : temp2) {
				ob.write(j);
			}
			ob.close();
			System.out.println("====分割文件:" + file.getName() + " ====");
			printFreeMemory();
		}

		System.out.println("====分割文件结束====");
		printFreeMemory();

	}

	/**
	 * 快速排序
	 * @param a
	 * @param low
	 * @param hight
	 */
	public static void sort(int a[], int low, int hight) {
		int i, j, index;
		if (low > hight) {
			return;
		}
		i = low;
		j = hight;
		index = a[i]; // 用子表的第一个记录做基准
		while (i < j) { // 从表的两端交替向中间扫描
			while (i < j && a[j] >= index)
				j--;
			if (i < j)
				a[i++] = a[j];// 用比基准小的记录替换低位记录
			while (i < j && a[i] < index)
				i++;
			if (i < j) // 用比基准大的记录替换高位记录
				a[j--] = a[i];
		}
		a[i] = index;// 将基准数值替换回 a[i]
		sort(a, low, i - 1); // 对低子表进行递归排序
		sort(a, i + 1, hight); // 对高子表进行递归排序
	}

	static void printFreeMemory() {
		System.out.println(" free memory:" + Runtime.getRuntime().freeMemory() / 1024 / 1024 + " mb");
	}

	static void printFile(String filePath, boolean showDetailed) {
		InputBuffer ib = new InputBuffer(BLOCK_SIZE * 3, filePath);
		Integer read;
		int i = 0;
		while ((read = ib.read()) != null) {
			if (showDetailed)
				System.out.println(read);
			i++;
		}
		System.out.println("共:" + i + "行");
	}

}

 

转载于:https://my.oschina.net/u/2403310/blog/1797363

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值