hash将大文件转成小文件的代码实例,能排序和求TopN

不多哔哔~ 

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.util.*;

/**
 * @author dyf
 *         将一个大文件中的数据排序 无法一次读入内存情况的处理方法
 */
public class LargeDataSortTest {
    static File file = new File("F:" + File.separator + "dataTest" + File.separator + "data.txt");
    static File file1 = new File("F:" + File.separator + "dataTest" + File.separator + "dataSorted.txt");

    public static void main(String[] args) throws Exception {
//        createData();
        System.out.println("大文件写入成功");
        separateFile();
        System.out.println("文件拆分成功");

//        everySingleFileSort();
//        System.out.println("小文件排序完成");
//        mergeFile();
//        System.out.println("所有排序都已完成");

    }

          //创建一个超大文件
    public static void createData() throws IOException {
        FileWriter fw = new FileWriter(file);
        BufferedWriter bw = new BufferedWriter(fw);
        Random random = new Random();
        for (int i = 0; i < 1000000; i++) {
            bw.write(random.nextInt(Integer.MAX_VALUE) + "\r\n");
        }
        bw.close();
        fw.close();
    }



//对文件中每条数据hash取模 分成20个小文件 其中有一个特点:hash值相同(数据相同)必在同一个小文件

    public static void separateFile() throws IOException {
        FileReader fr = new FileReader(file);
        BufferedReader br = new BufferedReader(fr);
        FileWriter fw = null;
        BufferedWriter bw = null;
        HashMap fwList = new HashMap();
        HashMap bwList = new HashMap();


        for(int i = 0 ; i< 20 ; i++ ){
            fw = new FileWriter("F:" + File.separator + "dataTest" + File.separator + "data" + i + ".txt");
            bw = new BufferedWriter(fw);
            fwList.put(i,fw);
            bwList.put(i,bw);
        }

        while (br.ready()) {
            Integer readNumber = Integer.valueOf(br.readLine());
            int a = readNumber.hashCode() % 20;
            BufferedWriter bufferedWriter = bwList.get(a);
            bufferedWriter.write(readNumber + "\r\n");
        }

        //遍历关闭所有子文件流
        /*for (Iterator iterator = bwList.iterator(); iterator.hasNext(); ) {
            BufferedWriter it = (BufferedWriter) iterator.next();
            it.close();
        }*/
        for(Map.Entry a : bwList.entrySet()){
            a.getValue().close();
        }

        /*for (Iterator iterator = fwList.iterator(); iterator.hasNext(); ) {
            FileWriter it = (FileWriter) iterator.next();
            it.close();
        }*/
        for(Map.Entry a : fwList.entrySet()){
            a.getValue().close();
        }

        br.close();
        fr.close();
    }

    //对每个小文件进行排序
    public static void everySingleFileSort() throws Exception {
        LinkedList numbers;
        for (int i = 0; i < 20; i++) {
            numbers = new LinkedList();
            String path = "F:" + File.separator + "dataTest" + File.separator + "data" + i + ".txt";
            FileReader fr = new FileReader(path);
            BufferedReader br = new BufferedReader(fr);
            while (br.ready()) {
                numbers.add(Integer.parseInt(br.readLine()));
            }
            Collections.sort(numbers);
            numbersWrite(numbers, path);
            br.close();
            fr.close();
        }
    }

    //将排好序的没个文件写回到小文件中
    public static void numbersWrite(LinkedList numbers, String path) throws IOException {
        FileWriter fw = new FileWriter(path);
        BufferedWriter bw = new BufferedWriter(fw);
        for (Iterator iterator = numbers.iterator(); iterator.hasNext(); ) {
            Integer num = (Integer) iterator.next();
            bw.write(num + "\r\n");
        }
        bw.close();
        fw.close();
    }

    //再将所有小文件整合到一个大文件中
    public static void mergeFile() throws Exception {
        PriorityQueue queue = new PriorityQueue(20, new Obj());
        FileReader fr = null;
        BufferedReader br = null;
        FileWriter fw = new FileWriter(file1);
        BufferedWriter bw = new BufferedWriter(fw);
        List frList = new LinkedList();
        List brList = new LinkedList();
        int n;
        for (int i = 0; i < 20; i++) {
            String path = "F:" + File.separator + "dataTest" + File.separator + "data" + i + ".txt";
            fr = new FileReader(path);
            br = new BufferedReader(fr);
            frList.add(fr);
            brList.add(br);
        }
        //把每个小文件的第一个数读入队列中
        for (int i = 0; i <= 20; i++) {
            BufferedReader buffR;
            if (i == 20) {
                while (queue.size() != 0) {
                    Obj obj = queue.poll();
                    bw.write(obj.a + "\r\n");
                    buffR = brList.get(obj.b);
                    while (buffR.ready() && queue.size() < 20) {
                        n = Integer.parseInt(buffR.readLine());
                        queue.add(new Obj(n, obj.b));
                    }
                }
                break;
            }
            buffR = brList.get(i);
            while (buffR.ready() && queue.size() < 20) {
                n = Integer.parseInt(buffR.readLine());
                Obj obj = new Obj(n, i);
                queue.add(obj);
                break;
            }
        }
        bw.close();
        fw.close();
        //遍历关闭所有子文件流
        for (Iterator iterator = brList.iterator(); iterator.hasNext(); ) {
            BufferedReader it = (BufferedReader) iterator.next();
            it.close();
        }

        for (Iterator iterator = frList.iterator(); iterator.hasNext(); ) {
            FileReader it = (FileReader) iterator.next();
            it.close();
        }
    }
}

class Obj implements Comparator {
    int a, b;

    Obj() {
    }

    Obj(int a, int b) {
        this.a = a;
        this.b = b;
    }

    public int compare(Obj o1, Obj o2) {
        return o1.a - o2.a;
    }
}

 

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值