在4G文件中查询出出现频率最多的数

生成测试文件

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.util.Random;

public class WriteBigFile {
	
	public static void main(String[] args) throws Exception {
		
		long startTime = System.currentTimeMillis();
		File file = new File("D:\\testFile\\test.txt");
		OutputStream out = new FileOutputStream(file);
		BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(out, 1024*1024*50); //缓冲区设置为50mb
		Random random = new Random();
		String index;
		for (int i = 1; i < 873810000;i++ ) { //大概 833 *1024 * 1024
			index = random.nextInt(10000)+",";  //每次最多五个字符
			bufferedOutputStream.write(index.getBytes());
		}
		bufferedOutputStream.close();//文件大小为833 *1024 * 1024 * 5左右 ==> 大概4G
		long endTime = System.currentTimeMillis();
		System.out.println( (endTime - startTime) );
	}
}

读文件

import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

/***
 * 读文件,并将大文件分隔成小文件
 * @author
 *
 */
public class ReadBigFile {

	public static void main(String[] args) throws Exception {
		HashMap<Integer, List<String>> map = new HashMap<Integer, List<String>>();

		long start = System.currentTimeMillis();
		String filePath = "D:\\testFile";
		File file = new File("D:\\testFile\\test.txt");
		// 读取字符流
		FileReader fileReader = new FileReader(file);
		BufferedReader bufferedReader = new BufferedReader(fileReader);
		StringBuffer strBuffer = new StringBuffer();
		int i;
		char ch;
		int num;
		int remainder;
		while ((i = bufferedReader.read()) != -1) {
			ch = (char) i;
			if (ch != ',') {
				strBuffer.append((char) ch);
				continue;
			}
			// 写入文件
			num = Integer.valueOf(strBuffer.toString());
			remainder = num % 1000;
			if (map.containsKey(remainder)) {
				List<String> list = map.get(remainder);
				list.add(num + ",");
				if(list.size() > 1024 ) {
					writeChildFiles(filePath+"\\child_"+remainder+".txt",list);
				}
			} else {
				ArrayList<String> list = new ArrayList<String>(1024);
				list.add(num + ",");
				map.put(remainder, list);
			}
			strBuffer.delete(0, strBuffer.length());
		}

		bufferedReader.close();

		long end = System.currentTimeMillis();

		System.out.println(end - start);
	}

	public static void writeChildFiles(String filePath,List<String> data) throws Exception {
		File file = new File(filePath);
		OutputStream out = new FileOutputStream(file,true);
		BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(out);
		int len = data.size();
		for (int i = 0; i < len; i++) {
			bufferedOutputStream.write(data.get(i).getBytes());
		}
		bufferedOutputStream.close();
		data.clear();
	}
}

获取出现次数最多的数

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

/***
 * 获取子文件中,出现次数最多的数
 * @author 
 *
 */
public class getChildFileMaxNum {
	public static void main(String[] args) throws Exception {
		long start = System.currentTimeMillis();
		File file = new File("D:\\testFile");
		Map<Integer, Integer> map = new HashMap<Integer,Integer>();
		Set<Integer> keySet = null;
		Map<Integer, Integer> childMap = null;
		if(file.isDirectory()) {
			String[] list = file.list();
			String path = file.getPath();
			for (int i = 0; i < list.length; i++) {
				if( !list[i].startsWith("child")) {
					continue;
				}
				childMap = readChildFileAndWriteMax(path+"\\"+list[i]);
				keySet = childMap.keySet();
				for (Integer integer : keySet) {
					if(map.containsKey(integer)) {
						map.replace(integer, childMap.get(integer)+map.get(integer));
					}else {
						map.put(integer, childMap.get(integer));
					}
				}
			}
		}
		
		int max = 0;
		int key = -999;
		keySet = map.keySet();
		for (Integer integer : keySet) {
			if(map.get(integer) > max ) {
				max = map.get(integer);
				key = integer;
			}
		}
		System.out.println("max:"+max+",key:"+key);
		
		long end = System.currentTimeMillis();
		System.out.println(end-start);
	}

	private static Map<Integer, Integer> readChildFileAndWriteMax(String filePath) throws Exception {
		Map<Integer, Integer> map = new HashMap<Integer, Integer>();
		
		File file = new File(filePath);
		FileReader fileReader = new FileReader(file);
		BufferedReader bufferedReader = new BufferedReader(fileReader);
		StringBuffer strBuffer = new StringBuffer();
		int i;
		char ch;
		int num;
		int remainder;
		while ((i = bufferedReader.read()) != -1) {
			ch = (char) i;
			if (ch != ',') {
				strBuffer.append((char) ch);
				continue;
			}
			
			num = Integer.valueOf(strBuffer.toString());
			if( map.containsKey(num) ) {
				map.replace(num, map.get(num)+1);
			}else {
				map.put(num, 1);
			}
			strBuffer.delete(0, strBuffer.length());
		}
		
		int max = 0;
		int key = -999;
		Set<Integer> keySet = map.keySet();
		for (Integer integer : keySet) {
			if(map.get(integer) > max ) {
				max = map.get(integer);
				key = integer;
			}
		}
		bufferedReader.close();
		map.clear();
		map.put(key, max);
		//这里可以删除子文件
		return map;
	}
}

测试结果:
分隔文件大概花了10分钟
从1000个文件中找出出现次数最多的数大概花费1分钟

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值