多路归并 java_纯Java实现的多路归并快速排序算法

[代码]

纯Java实现的多路归并快速排序算法

package wjw.PreTrans;

import java.io.*;

import java.util.*;

import org.apache.commons.io.*;

public class MergeSort {

private static FastQSortAlgorithm QQ = new FastQSortAlgorithm();

private MergeSort() {

}

/**

* 得到指定文件的行数

* @param fileC String

* @return int

* @throws IOException

*/

private static int getFileLineSize(String fileC) throws IOException {

Reader fC = null;

try {

fC = new FileReader(fileC);

LineIterator it = IOUtils.lineIterator(fC);

int lineSize = 0;

while(it.hasNext()) {

it.nextLine();

lineSize++;

}

return lineSize;

} finally {

IOUtils.closeQuietly(fC);

}

}

/**

* 得到下一行的内容,如果已到文件末尾返回NULL

* @param iterator LineIterator

* @return String

*/

private static String nextLine(LineIterator iterator) {

if(iterator.hasNext()) {

return iterator.nextLine();

} else {

return null;

}

}

/**

* 读指定行数的字符串到缓冲区列表里

* @param iterator LineIterator

* @param bufList List

* @param lines int

*/

private static void readLines(LineIterator iterator, List bufList, int lines) {

for(int i = 0; i < lines; i++) {

if(!iterator.hasNext()) {

break;

}

bufList.add(iterator.nextLine());

}

}

/**

* 扫描fileC中的归并段并把它们交替分别送到文件fileA和fileB中,

* 本次归并段的大小为k*blockSize

* @param fileA String

* @param fileB String

* @param fileC String

* @param k int

* @param blockSize int

*/

private static void split(String fileA, String fileB, String fileC, int k,

int blockSize) throws FileNotFoundException, IOException {

boolean useA = true;

int i = 0;

List bufList = new ArrayList(blockSize); //大小为blockSize的缓冲区

Writer fA = null;

Writer fB = null;

Reader fC = null;

try {

fA = new BufferedWriter(new FileWriter(fileA));

fB = new BufferedWriter(new FileWriter(fileB));

fC = new FileReader(fileC);

LineIterator itC = IOUtils.lineIterator(fC);

while(itC.hasNext()) {

//->读入数据块

bufList.clear();

readLines(itC, bufList, blockSize);

//

if(useA) {

IOUtils.writeLines(bufList, "\n", fA);

} else {

IOUtils.writeLines(bufList, "\n", fB);

}

if(++i == k) {

i = 0;

useA = !useA;

}

}

} finally {

bufList.clear();

IOUtils.closeQuietly(fA);

IOUtils.closeQuietly(fB);

IOUtils.closeQuietly(fC);

}

}

/**

* n为当前归并段大小(k*blockSize);将文件fX的后续归并段拷入到fY,变量currRunPos为当前归并段的索引

* @param fileX String

* @param fileY String

* @param currRunPos int

* @param n int

* @return int 当前归并段的索引

*/

private static int copyTail(LineIterator fileX, Writer fileY, int currRunPos,

int n) throws IOException {

//从当前位置到归并段结束,拷贝每个数据

while(currRunPos <= n) {

//若没有更多的数据项,则文件结束且归并段结束

if(!fileX.hasNext()) {

break;

}

//修改当前归并段位置并将数据项写入fY

currRunPos++;

IOUtils.write(fileX.nextLine() + "\n", fileY);

}

return currRunPos;

}

/**

* 将文件fA和fB中长度为n(k*blockSize)的归并段合并回fC中

* @param fileA String

* @param fileB String

* @param fileC String

* @param n int

* @throws IOException

*/

private static void merge(String fileA, String fileB, String fileC, int n) throws

IOException {

//currA和currB表示在当前归并段中的位置

int currA = 1;

int currB = 1;

//分别从fA和fB中读出的数据项

String dataA, dataB;

Reader fA = null;

Reader fB = null;

Writer fC = null;

try {

fA = new FileReader(fileA);

fB = new FileReader(fileB);

fC = new BufferedWriter(new FileWriter(fileC));

LineIterator itA = IOUtils.lineIterator(fA);

LineIterator itB = IOUtils.lineIterator(fB);

dataA = nextLine(itA);

dataB = nextLine(itB);

for(; ; ) {

//若(dataA<=dataB),则将dataA拷贝到fC并修改当前归并段的位置

if(dataA.compareTo(dataB) <= 0) {

IOUtils.write(dataA + "\n", fC);

//从fA中取下一归并段,若不存在,则已到文件尾,应将fB的后续归并段拷入到fC;

//若当前位置>n,则已将所有fA的归并段拷完,应拷贝fB的后续归并段

dataA = nextLine(itA);

currA++;

if(dataA == null || currA > n) {

IOUtils.write(dataB + "\n", fC);

currB++;

currB = copyTail(itB, fC, currB, n);

//fA的大小>=fB的大小;若在fA的文件尾,则结束

if(dataA == null) {

break;

} else { //否则,应在新的归并段中,重置当前位置

currA = 1;

}

//取fB中的下一项.若不存在,则只有fA中剩余的部分要拷贝到fC,

//退出循环前将当前归并段写入fC

dataB = nextLine(itB);

if(dataB == null) {

IOUtils.write(dataA + "\n", fC);

currA = 2;

break;

} else { //否则,重置fB中当前归并段

currB = 1;

}

}

} else { //否则(dataA>dataB)

IOUtils.write(dataB + "\n", fC);

//从fB中取下一归并段,若不存在,则已到文件尾,应将fA的后续归并段拷入到fC;

//若当前位置>n,则已将所有fB的归并段拷完,应拷贝fA的后续归并段

dataB = nextLine(itB);

currB++;

if(dataB == null || currB > n) {

IOUtils.write(dataA + "\n", fC);

currA++;

currA = copyTail(itA, fC, currA, n);

//若fB中没有更多项,则置fA的当前位置,准备拷贝fA中的最后归并段到fC中

if(dataB == null) {

currA = 1;

break;

} else { //否则,置fB的当前位置,并从fA中读入数据

currB = 1;

if((dataA = nextLine(itA)) == null) {

break;

} else {

currA = 1;

}

}

}

}

} //

//将fA中可能存在的剩余的归并段写入fC中(注:fA的长度时>=fB的)

if(dataA != null && dataB == null) {

currA = copyTail(itA, fC, currA, n);

}

} finally {

IOUtils.closeQuietly(fA);

IOUtils.closeQuietly(fB);

IOUtils.closeQuietly(fC);

}

}

/**

* 用指定的blockSize块大小,排序指定的文件fileC

* @param fileC String

* @param blockSize int

* @throws IOException

*/

/**

* 用指定的blockSize块大小,排序指定的文件fileSource,排序后的文件是fileOut

* @param fileSource String

* @param fileOut String

* @param blockSize int

* @param removeDuple

* @throws IOException

*/

public static void sort(String fileSource,String fileOut, int blockSize,boolean removeDuple) throws IOException {

String fileA = File.createTempFile("wjw", null).getAbsolutePath();

String fileB = File.createTempFile("wjw", null).getAbsolutePath();

int mergeIndex = 1;

int lineSize = getFileLineSize(fileSource);

int k = 1;

int n = k * blockSize;

boolean useA = true;

List list = new ArrayList(blockSize);

Writer fA = null;

Writer fB = null;

Reader fC = null;

try {

fA = new BufferedWriter(new FileWriter(fileA));

fB = new BufferedWriter(new FileWriter(fileB));

fC = new FileReader(fileSource);

LineIterator itC = IOUtils.lineIterator(fC);

if(lineSize <= blockSize) { //对于小文件,从fC读入数据,直接排序后写回文件中

readLines(itC, list, lineSize);

Collections.sort(list);

IOUtils.closeQuietly(fC);

FileUtils.writeLines(new File(fileOut), "GBK", list, "\n");

list.clear();

if(removeDuple) {

removeDuple(fileOut);

}

return;

}

//->第一次分割,合并

System.out.println("第:"+mergeIndex+"分割,合并");

while(itC.hasNext()) {

list.clear();

readLines(itC, list, blockSize);

Collections.sort(list);

if(useA) {

IOUtils.writeLines(list, "\n", fA);

} else {

IOUtils.writeLines(list, "\n", fB);

}

useA = !useA;

}

list.clear();

IOUtils.closeQuietly(fA);

IOUtils.closeQuietly(fB);

IOUtils.closeQuietly(fC);

merge(fileA, fileB, fileOut, blockSize);

mergeIndex++;

//

//->将当前归并段大小加倍,循环进行

k = k * 2;

n = k * blockSize;

while(n < lineSize) { //当n>=文件大小时,fC仅剩一个已排好序的归并段

System.out.println("第:"+mergeIndex+"分割,合并");

split(fileA, fileB, fileOut, k, blockSize);

merge(fileA, fileB, fileOut, n);

mergeIndex++;

k = k * 2;

n = k * blockSize;

}

//->将当前归并段大小加倍,循环进行

} finally {

IOUtils.closeQuietly(fA);

IOUtils.closeQuietly(fB);

IOUtils.closeQuietly(fC);

(new File(fileA)).delete();

(new File(fileB)).delete();

}

if(removeDuple) {

removeDuple(fileOut);

}

}

/**

* 删除已经排好序的文件中重复的数据

* @param fileC String

* @throws IOException

*/

private static void removeDuple(String fileC) throws IOException {

System.out.println("去重");

Reader fC = null;

Writer fTemp = null;

File tempFile = File.createTempFile("wjw", null);

try {

fC = new FileReader(fileC);

fTemp = new BufferedWriter(new FileWriter(tempFile));

String tmpA = "";

String tmpB = "";

LineIterator itC = IOUtils.lineIterator(fC);

while(itC.hasNext()) {

tmpB = itC.nextLine();

if(tmpB.compareTo(tmpA) != 0) {

IOUtils.write(tmpB + "\n", fTemp);

tmpA = tmpB;

}

}

} finally {

IOUtils.closeQuietly(fTemp);

IOUtils.closeQuietly(fC);

}

File cFile = new File(fileC);

if(cFile.delete()) {

if(tempFile.renameTo(cFile)) {

tempFile.delete();

}

}

}

public static String formatSecond(long seconds) {

long h = seconds /(60*60);

StringBuffer sb = new StringBuffer();

sb.append(h+"小时");

seconds = seconds%(60*60);

long c = seconds /60;

sb.append(c+"分");

sb.append(seconds %60+"秒");

return sb.toString();

}

public static void main(String args[]) {

try {

String fileName = "d:/ESort.txt";

int blockSize = 100000;

long c1 = System.currentTimeMillis();

MergeSort.sort(fileName,fileName + "_SORT", blockSize,true);

long c2 = (System.currentTimeMillis() - c1) / 1000;

System.out.println("耗时:"+formatSecond(c2));

} catch(IOException ex) {

ex.printStackTrace();

}

}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值