多路归并 java_纯Java实现的多路归并快速排序算法

package wjw.PreTrans;

import java.io.*;

import java.util.*;

import org.apache.commons.io.*;

public class MergeSort {

private static

FastQSortAlgorithm QQ = new

FastQSortAlgorithm();

private MergeSort() {

}

private static int getFileLineSize(String

fileC) throws IOException {

Reader fC = null;

try {

fC =

new FileReader(fileC);

LineIterator it = IOUtils.lineIterator(fC);

int

lineSize = 0;

while(it.hasNext()) {

it.nextLine();

lineSize++;

}

return lineSize;

} finally {

IOUtils.closeQuietly(fC);

}

}

private static String nextLine(LineIterator

iterator) {

if(iterator.hasNext())

{

return iterator.nextLine();

} else {

return null;

}

}

private static void readLines(LineIterator

iterator, List bufList, int

lines) {

for(int i = 0; i

< lines; i++) {

if(!iterator.hasNext()) {

break;

}

bufList.add(iterator.nextLine());

}

}

private static void split(String fileA,

String fileB, String fileC, int k,

int blockSize) throws

FileNotFoundException, IOException {

boolean useA =

true;

int i = 0;

List bufList = new

ArrayList(blockSize);

//大小为blockSize的缓冲区

Writer fA = null;

Writer fB = null;

Reader fC = null;

try {

fA =

new BufferedWriter(new FileWriter(fileA));

fB =

new BufferedWriter(new FileWriter(fileB));

fC =

new FileReader(fileC);

LineIterator itC = IOUtils.lineIterator(fC);

while(itC.hasNext()) {

//->读入数据块

bufList.clear();

readLines(itC, bufList, blockSize);

//

if(useA) {

IOUtils.writeLines(bufList,

"\n", fA);

} else {

IOUtils.writeLines(bufList,

"\n", fB);

}

if(++i == k) {

i = 0;

useA = !useA;

}

}

} finally {

bufList.clear();

IOUtils.closeQuietly(fA);

IOUtils.closeQuietly(fB);

IOUtils.closeQuietly(fC);

}

}

private static int copyTail(LineIterator

fileX, Writer fileY, int currRunPos,

int n)

throws IOException {

//从当前位置到归并段结束,拷贝每个数据

while(currRunPos

<= n) {

//若没有更多的数据项,则文件结束且归并段结束

if(!fileX.hasNext()) {

break;

}

//修改当前归并段位置并将数据项写入fY

currRunPos++;

IOUtils.write(fileX.nextLine() + "\n", fileY);

}

return currRunPos;

}

private static void merge(String fileA,

String fileB, String fileC, int n) throws

IOException {

//currA和currB表示在当前归并段中的位置

int currA = 1;

int currB = 1;

//分别从fA和fB中读出的数据项

String dataA,

dataB;

Reader fA = null;

Reader fB = null;

Writer fC = null;

try {

fA =

new FileReader(fileA);

fB =

new FileReader(fileB);

fC =

new BufferedWriter(new FileWriter(fileC));

LineIterator itA = IOUtils.lineIterator(fA);

LineIterator itB = IOUtils.lineIterator(fB);

dataA

= nextLine(itA);

dataB

= nextLine(itB);

for(;

; ) {

//若(dataA<=dataB),则将dataA拷贝到fC并修改当前归并段的位置

if(dataA.compareTo(dataB) <= 0)

{

IOUtils.write(dataA + "\n",

fC);

//从fA中取下一归并段,若不存在,则已到文件尾,应将fB的后续归并段拷入到fC;

//若当前位置>n,则已将所有fA的归并段拷完,应拷贝fB的后续归并段

dataA = nextLine(itA);

currA++;

if(dataA == null || currA

> n) {

IOUtils.write(dataB + "\n", fC);

currB++;

currB =

copyTail(itB, fC, currB, n);

//fA的大小>=fB的大小;若在fA的文件尾,则结束

if(dataA

== null) {

break;

} else {

//否则,应在新的归并段中,重置当前位置

currA = 1;

}

//取fB中的下一项.若不存在,则只有fA中剩余的部分要拷贝到fC,

//退出循环前将当前归并段写入fC

dataB =

nextLine(itB);

if(dataB

== null) {

IOUtils.write(dataA + "\n", fC);

currA = 2;

break;

} else {

//否则,重置fB中当前归并段

currB = 1;

}

}

} else { //否则(dataA>dataB)

IOUtils.write(dataB + "\n",

fC);

//从fB中取下一归并段,若不存在,则已到文件尾,应将fA的后续归并段拷入到fC;

//若当前位置>n,则已将所有fB的归并段拷完,应拷贝fA的后续归并段

dataB = nextLine(itB);

currB++;

if(dataB == null || currB

> n) {

IOUtils.write(dataA + "\n", fC);

currA++;

currA =

copyTail(itA, fC, currA, n);

//若fB中没有更多项,则置fA的当前位置,准备拷贝fA中的最后归并段到fC中

if(dataB

== null) {

currA = 1;

break;

} else {

//否则,置fB的当前位置,并从fA中读入数据

currB = 1;

if((dataA = nextLine(itA)) == null) {

break;

} else {

currA = 1;

}

}

}

}

}

//

//将fA中可能存在的剩余的归并段写入fC中(注:fA的长度时>=fB的)

if(dataA != null && dataB == null)

{

currA = copyTail(itA, fC, currA, n);

}

} finally {

IOUtils.closeQuietly(fA);

IOUtils.closeQuietly(fB);

IOUtils.closeQuietly(fC);

}

}

public static void sort(String

fileSource,String fileOut, int blockSize,boolean removeDuple)

throws IOException {

String fileA =

File.createTempFile("wjw", null).getAbsolutePath();

String fileB =

File.createTempFile("wjw", null).getAbsolutePath();

int mergeIndex =

1;

int lineSize =

getFileLineSize(fileSource);

int k = 1;

int n = k *

blockSize;

boolean useA =

true;

List list = new

ArrayList(blockSize);

Writer fA = null;

Writer fB = null;

Reader fC = null;

try {

fA =

new BufferedWriter(new FileWriter(fileA));

fB =

new BufferedWriter(new FileWriter(fileB));

fC =

new FileReader(fileSource);

LineIterator itC = IOUtils.lineIterator(fC);

if(lineSize <= blockSize) {

//对于小文件,从fC读入数据,直接排序后写回文件中

readLines(itC, list, lineSize);

Collections.sort(list);

IOUtils.closeQuietly(fC);

FileUtils.writeLines(new File(fileOut), "GBK",

list, "\n");

list.clear();

if(removeDuple) {

removeDuple(fileOut);

}

return;

}

//->第一次分割,合并

System.out.println("第:"+mergeIndex+"分割,合并");

while(itC.hasNext()) {

list.clear();

readLines(itC, list, blockSize);

Collections.sort(list);

if(useA) {

IOUtils.writeLines(list,

"\n", fA);

} else {

IOUtils.writeLines(list,

"\n", fB);

}

useA = !useA;

}

list.clear();

IOUtils.closeQuietly(fA);

IOUtils.closeQuietly(fB);

IOUtils.closeQuietly(fC);

merge(fileA, fileB, fileOut, blockSize);

mergeIndex++;

//

//->将当前归并段大小加倍,循环进行

k = k

* 2;

n = k

* blockSize;

while(n < lineSize) {

//当n>=文件大小时,fC仅剩一个已排好序的归并段

System.out.println("第:"+mergeIndex+"分割,合并");

split(fileA, fileB, fileOut, k,

blockSize);

merge(fileA, fileB, fileOut, n);

mergeIndex++;

k = k * 2;

n = k * blockSize;

}

//->将当前归并段大小加倍,循环进行

} finally {

IOUtils.closeQuietly(fA);

IOUtils.closeQuietly(fB);

IOUtils.closeQuietly(fC);

(new

File(fileA)).delete();

(new

File(fileB)).delete();

}

if(removeDuple) {

removeDuple(fileOut);

}

}

private static void removeDuple(String

fileC) throws IOException {

System.out.println("去重");

Reader fC = null;

Writer fTemp =

null;

File tempFile =

File.createTempFile("wjw", null);

try {

fC =

new FileReader(fileC);

fTemp

= new BufferedWriter(new FileWriter(tempFile));

String tmpA = "";

String tmpB = "";

LineIterator itC = IOUtils.lineIterator(fC);

while(itC.hasNext()) {

tmpB = itC.nextLine();

if(tmpB.compareTo(tmpA) != 0) {

IOUtils.write(tmpB + "\n",

fTemp);

tmpA = tmpB;

}

}

} finally {

IOUtils.closeQuietly(fTemp);

IOUtils.closeQuietly(fC);

}

File cFile = new

File(fileC);

if(cFile.delete())

{

if(tempFile.renameTo(cFile)) {

tempFile.delete();

}

}

}

public static String formatSecond(long

seconds) {

long h = seconds

/(60*60);

StringBuffer sb = new

StringBuffer();

sb.append(h+"小时");

seconds =

seconds%(60*60);

long c = seconds

/60;

sb.append(c+"分");

sb.append(seconds

`+"秒");

return

sb.toString();

}

public static void main(String args[])

{

try {

String fileName = "d:/ESort.txt";

int

blockSize = 100000;

long

c1 = System.currentTimeMillis();

MergeSort.sort(fileName,fileName + "_SORT", blockSize,true);

long

c2 = (System.currentTimeMillis() - c1) / 1000;

System.out.println("耗时:"+formatSecond(c2));

} catch(IOException ex)

{

ex.printStackTrace();

}

}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值