import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
/**
*
* run configuration -Xms5m -Xmx5m
*
*/
public class BigDataSort {
public final static String SMALL_FILE_PATH = "C://BigData//";
public final static int BIG_NUM_LINE = 200;
public final static String ORING_FILE_PATH = "C://BigData//bigData.txt";
public final static int SMALL_FILE_LINE = 10; //1M for 1 small file
private File tempFiles[];
private void createBigsortNums() throws IOException {
BufferedWriter writer = new BufferedWriter(new FileWriter(ORING_FILE_PATH));
Random random = new Random();
for (int i = 0; i < BIG_NUM_LINE; i++) {
writer.write(String.valueOf(random.nextInt(100000000)));
if(i!=BIG_NUM_LINE-1)writer.newLine();// 确保最后一行不是回车
}
writer.close();
}
private void beSmallFileAndSort() throws IOException {
BufferedReader bigDataFile = new BufferedReader(new FileReader(ORING_FILE_PATH));
List<Integer> smallLine = null;
tempFiles = new File[BIG_NUM_LINE / SMALL_FILE_LINE];
for (int i = 0; i < tempFiles.length; i++) {
tempFiles[i] = new File(SMALL_FILE_PATH + "sortTempFile" + i + ".txt");
BufferedWriter smallWtite = new BufferedWriter(new FileWriter(tempFiles[i]));
smallLine = new ArrayList<Integer>();
for (int j = 0; j < SMALL_FILE_LINE; j++)
smallLine.add(Integer.parseInt(bigDataFile.readLine()));
Collections.sort(smallLine);
for(int k=0;k<smallLine.size();k++){
<span style="white-space:pre"> </span>smallWtite.write(String.valueOf(smallLine.get(k)));
<span style="white-space:pre"> </span>if(k!=smallLine.size()-1)smallWtite.newLine();
}
/* for (Object num : smallLine.toArray())
smallWtite.write(num + "\n");*/
smallWtite.close();
}
}
private void unitFileToSort() throws IOException {
File tempFile = null;
for(int i=1;i<tempFiles.length;i++){
tempFile=sortBySmallFile(tempFiles[0],tempFiles[i],i);
tempFiles[0].delete();
tempFiles[0]=tempFile;
// try {
//<span style="white-space:pre"> </span>Thread.sleep(1500);
//<span style="white-space:pre"> </span>} catch (InterruptedException e) {
//<span style="white-space:pre"> </span>// TODO Auto-generated catch block
//<span style="white-space:pre"> </span>e.printStackTrace();
//<span style="white-space:pre"> </span>}
}
File newFile = new File(SMALL_FILE_PATH+"sortResult.txt");
if(newFile.exists())newFile.delete();
tempFile.renameTo(newFile);
}
/* private void unitFileToSort() throws IOException {
File tempFile = null;
for(int i=0;i<tempFiles.length;i+=2){
tempFile=sortBySmallFile(tempFiles[i],tempFiles[i+1],i);
tempFiles[i].delete();
tempFiles[i+1].delete();
tempFiles[i]=tempFile;
}
tempFile.renameTo(new File(SMALL_FILE_PATH+"sortResult.txt"));
}*/
public static void testSort() throws IOException{
<span style="white-space:pre"> </span> BufferedReader fromRd = new BufferedReader(new FileReader(SMALL_FILE_PATH+"1.txt"));
BufferedReader toTempRd = new BufferedReader(new FileReader(SMALL_FILE_PATH+"2.txt"));
File newSortFile = new File(SMALL_FILE_PATH+"testunit.txt");
BufferedWriter newSortFileWt = new BufferedWriter(new FileWriter(newSortFile));
int index = Integer.MIN_VALUE;
int toPoint = Integer.MIN_VALUE;
index = Integer.parseInt(fromRd.readLine());
toPoint = Integer.parseInt(toTempRd.readLine());
while (fromRd.ready()||toTempRd.ready()) {
if (index <= toPoint) {
newSortFileWt.write(String.valueOf(index));
newSortFileWt.newLine();
if(fromRd.ready())index = Integer.parseInt(fromRd.readLine());
else break;
} else {
newSortFileWt.write(String.valueOf(toPoint));
newSortFileWt.newLine();
if(toTempRd.ready())toPoint = Integer.parseInt(toTempRd.readLine());
else break;
}
}
<span style="white-space:pre"> </span>System.out.println(index + " " + toPoint);
<span style="white-space:pre"> </span>if (fromRd.ready() && (!toTempRd.ready())) {
<span style="white-space:pre"> </span>newSortFileWt.write(String.valueOf(index));
<span style="white-space:pre"> </span>newSortFileWt.newLine();
<span style="white-space:pre"> </span>while (fromRd.ready()) {
<span style="white-space:pre"> </span>newSortFileWt.write(fromRd.readLine());
<span style="white-space:pre"> </span>if (fromRd.ready())newSortFileWt.newLine();
<span style="white-space:pre"> </span>}
<span style="white-space:pre"> </span>}
<span style="white-space:pre"> </span>else if ((!fromRd.ready()) && toTempRd.ready()) {
<span style="white-space:pre"> </span>newSortFileWt.write(String.valueOf(toPoint));
<span style="white-space:pre"> </span>newSortFileWt.newLine();
<span style="white-space:pre"> </span>while (toTempRd.ready()) {
<span style="white-space:pre"> </span>newSortFileWt.write(toTempRd.readLine());
<span style="white-space:pre"> </span>if(toTempRd.ready())newSortFileWt.newLine();
<span style="white-space:pre"> </span>}
<span style="white-space:pre"> </span>}
<span style="white-space:pre"> </span>else if((!fromRd.ready())&&(!toTempRd.ready())){
<span style="white-space:pre"> </span>newSortFileWt.write(String.valueOf(index<=toPoint?index:toPoint));
<span style="white-space:pre"> </span>newSortFileWt.newLine();
newSortFileWt.write(String.valueOf(index>toPoint?index:toPoint));
// newSortFileWt.newLine();
}
newSortFileWt.close();
fromRd.close();
toTempRd.close();
BufferedReader newFile=new BufferedReader(new FileReader(SMALL_FILE_PATH+"testunit.txt"));
while(newFile.ready()){
<span style="white-space:pre"> </span> System.out.println(newFile.readLine());
}
newFile.close();
}
public static File sortBySmallFile(File fromFile, File toFile, int i) throws IOException {
BufferedReader fromRd = new BufferedReader(new FileReader(fromFile));
BufferedReader toTempRd = new BufferedReader(new FileReader(toFile));
File newSortFile = new File(SMALL_FILE_PATH+i+"unit.txt");
System.out.println(fromFile.getPath());
BufferedWriter newSortFileWt = new BufferedWriter(new FileWriter(newSortFile));
int index = Integer.parseInt(fromRd.readLine());
int toPoint = Integer.parseInt(toTempRd.readLine());
while (fromRd.ready()||toTempRd.ready()) {
if (index <= toPoint) {
newSortFileWt.write(String.valueOf(index));
newSortFileWt.newLine();
if(fromRd.ready())index = Integer.parseInt(fromRd.readLine());
else break;
} else {
newSortFileWt.write(String.valueOf(toPoint));
newSortFileWt.newLine();
if(toTempRd.ready())toPoint = Integer.parseInt(toTempRd.readLine());
else break;
}
}
//处理尾数
<span style="white-space:pre"> </span>if (fromRd.ready() && (!toTempRd.ready())) {
<span style="white-space:pre"> </span>newSortFileWt.write(String.valueOf(index));
<span style="white-space:pre"> </span>newSortFileWt.newLine();
<span style="white-space:pre"> </span>while (fromRd.ready()) {
<span style="white-space:pre"> </span>newSortFileWt.write(fromRd.readLine());
<span style="white-space:pre"> </span>if (fromRd.ready())newSortFileWt.newLine();//确保文件最后一行不是回车
<span style="white-space:pre"> </span>}
<span style="white-space:pre"> </span>}
<span style="white-space:pre"> </span>else if ((!fromRd.ready()) && toTempRd.ready()) {
<span style="white-space:pre"> </span>newSortFileWt.write(String.valueOf(toPoint));
<span style="white-space:pre"> </span>newSortFileWt.newLine();
<span style="white-space:pre"> </span>while (toTempRd.ready()) {
<span style="white-space:pre"> </span>newSortFileWt.write(toTempRd.readLine());
<span style="white-space:pre"> </span>if(toTempRd.ready())newSortFileWt.newLine();
<span style="white-space:pre"> </span>}
<span style="white-space:pre"> </span>}
<span style="white-space:pre"> </span>else if((!fromRd.ready())&&(!toTempRd.ready())){
<span style="white-space:pre"> </span>newSortFileWt.write(String.valueOf(index<=toPoint?index:toPoint));
<span style="white-space:pre"> </span>newSortFileWt.newLine();
newSortFileWt.write(String.valueOf(index>toPoint?index:toPoint));
// newSortFileWt.newLine();
}
newSortFileWt.close();
fromRd.close();
toTempRd.close();
toFile.delete();
return newSortFile;
}
public static void main(String[] args) throws IOException {
<span style="white-space:pre"> </span>BigDataSort bds=new BigDataSort();
<span style="white-space:pre"> </span>bds.createBigsortNums();
<span style="white-space:pre"> </span>bds.beSmallFileAndSort();
<span style="white-space:pre"> </span>bds.unitFileToSort();
//testSort();
}
}
sortBySmallFile方法思路很简单,但是代码写得不好,没想出好的来。就是把第一个文件与第二个文件按顺序合并,再把合并后的文件与第三个合并,以此类推。
小内存大数据排序
最新推荐文章于 2024-06-15 10:59:05 发布