海量IP地址排序统计出现次数最多的K个地址

本文基于海量IP地址无法一次性装入内存进行排序,因此采用如下步骤:(1)hash后分割成K(这里取1000)个文件(2)统计每个文件出现次数最多的K个地址(3)对K*N个地址进行统计(最小堆/归并/快排)

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Random;

public class IP {

	public static void generateIp(int num,String path) {
		Random random = new Random();
		File file = new File(path);
		String dot = ".";
		String ip1 = "192.";
		try {
			if(!file.exists()) {
				file.getParentFile().mkdirs();
				file.createNewFile();
			}else {
//				return;
			}
			OutputStreamWriter oStreamWriter = new OutputStreamWriter(new FileOutputStream(file));
			BufferedWriter bufferedWriter = new BufferedWriter(oStreamWriter);
			for (int i = 0; i < num; i++) {
				StringBuilder sBuilder = new StringBuilder();
				sBuilder.append(ip1).append(random.nextInt(256) + dot).
				append(random.nextInt(256) + dot).append(random.nextInt(256) + " ");
				bufferedWriter.append(sBuilder);
			}
			bufferedWriter.flush();
			bufferedWriter.close();
			System.out.println("ip生成完毕");
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		
	}
	
	public static void hash(String path,int fileNum) {
		File file = new File(path);
		InputStreamReader inputStreamReader;
		File[] fileArr = new File[fileNum];
		OutputStreamWriter[] osArr= new OutputStreamWriter[fileNum];
		try {
			for (int i = 0; i < fileArr.length; i++) {
				fileArr[i] = new File(file.getParent() + "/hash/" + i + ".txt");
				fileArr[i].getParentFile().mkdirs();
				fileArr[i].createNewFile();
				osArr[i] = new OutputStreamWriter(new FileOutputStream(fileArr[i],true));
			}
			inputStreamReader = new InputStreamReader(new FileInputStream(file));
			BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
			
			while(true) {
				StringBuilder sBuilder = new StringBuilder();
				int ii = -1;
				while((ii = bufferedReader.read()) != -1 && ii != (int)' ') {
					sBuilder.append((char)ii);
				}
				int h = sBuilder.toString().hashCode();
				int hashCode =  Math.abs((h ^ (h >>> 16))) % fileNum;
				osArr[hashCode].write(sBuilder.append(' ').toString());
				if(ii == -1) {
					break;
				}
			}
			bufferedReader.close();
			for (OutputStreamWriter outputStreamWriter : osArr) {
				outputStreamWriter.flush();
				outputStreamWriter.close();
			}
			System.out.println("hash完毕");
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	public static void hashAndSort(String path,int fileNum,int top) {
		File file = new File(path);
		try {
			HashMap<String, Integer> resultMap = new HashMap<>();
			HashMap<String, Integer> topMap = new HashMap<>();
			for (int i = 0; i < fileNum; i++) {
				File fileHash = new File(file.getParent() + "/hash/" + i + ".txt");
//				File fileHash = new File(file.getParent() + "/新建文本文档.txt");
				InputStreamReader inputStreamReader = new InputStreamReader(new FileInputStream(fileHash));
				BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
				HashMap<String, Integer> map = new HashMap<>(1024);
				while(true) {
					StringBuilder sBuilder = new StringBuilder();
					int ii = -1;
					while((ii = bufferedReader.read()) != -1 && ii != (int)' ') {
						sBuilder.append((char)ii);
					}
//					System.out.println(sBuilder);
					Integer integer = map.get(sBuilder.toString());
//					System.out.println(integer);
					if(integer == null) {
						map.put(sBuilder.toString(), 1);
					}else {
						map.put(sBuilder.toString(), integer + 1);
					}
					if(ii == -1) {
						break;
					}
				}
				sortMap(map, top, topMap);
				bufferedReader.close();
			}
			sortMap(topMap, top, resultMap);
			File result = new File(file.getParent() + "/result" + ".txt");
			OutputStreamWriter oStreamWriter = new OutputStreamWriter(new FileOutputStream(result));
			for (Entry<String,Integer> entry : resultMap.entrySet()) {
				oStreamWriter.write(entry.getKey() + " 出现次数:" + entry.getValue());
				oStreamWriter.write("\r\n");
			}
			for (Entry<String,Integer> entry : resultMap.entrySet()) {
				System.out.println(entry.toString());
			}
			oStreamWriter.flush();
			oStreamWriter.close();
		}catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	public static void sortMap(Map<String, Integer> map,int top,Map<String, Integer> resultMap) {
		List<Entry<String, Integer>> list = new ArrayList<>(map.entrySet());
		Collections.sort(list, new Comparator<Entry<String, Integer>>() {
			@Override
			public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
				if(o1.getValue() > o2.getValue()) {
					return -1;
				}else if (o1.getValue() < o2.getValue()) {
					return 1;
				}else {
					return 0;
				}
			}
		});
		for (int j = 0; j < list.size() && j < top; j++) {
			resultMap.put(list.get(j).getKey(), list.get(j).getValue());
		}
	}
	
	public static void main(String[] args) {
		String path = "F:/ip/source2.txt";
		int top = 10;
		int filenum = 1000;
		generateIp((int)Math.pow(10, 9), path);
		hash(path,filenum);
		hashAndSort(path, filenum,top);
	}
}


  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值