本文基于海量IP地址无法一次性装入内存进行排序,因此采用如下步骤:(1)hash后分割成K(这里取1000)个文件(2)统计每个文件出现次数最多的K个地址(3)对K*N个地址进行统计(最小堆/归并/快排)
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Random;
public class IP {
public static void generateIp(int num,String path) {
Random random = new Random();
File file = new File(path);
String dot = ".";
String ip1 = "192.";
try {
if(!file.exists()) {
file.getParentFile().mkdirs();
file.createNewFile();
}else {
// return;
}
OutputStreamWriter oStreamWriter = new OutputStreamWriter(new FileOutputStream(file));
BufferedWriter bufferedWriter = new BufferedWriter(oStreamWriter);
for (int i = 0; i < num; i++) {
StringBuilder sBuilder = new StringBuilder();
sBuilder.append(ip1).append(random.nextInt(256) + dot).
append(random.nextInt(256) + dot).append(random.nextInt(256) + " ");
bufferedWriter.append(sBuilder);
}
bufferedWriter.flush();
bufferedWriter.close();
System.out.println("ip生成完毕");
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void hash(String path,int fileNum) {
File file = new File(path);
InputStreamReader inputStreamReader;
File[] fileArr = new File[fileNum];
OutputStreamWriter[] osArr= new OutputStreamWriter[fileNum];
try {
for (int i = 0; i < fileArr.length; i++) {
fileArr[i] = new File(file.getParent() + "/hash/" + i + ".txt");
fileArr[i].getParentFile().mkdirs();
fileArr[i].createNewFile();
osArr[i] = new OutputStreamWriter(new FileOutputStream(fileArr[i],true));
}
inputStreamReader = new InputStreamReader(new FileInputStream(file));
BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
while(true) {
StringBuilder sBuilder = new StringBuilder();
int ii = -1;
while((ii = bufferedReader.read()) != -1 && ii != (int)' ') {
sBuilder.append((char)ii);
}
int h = sBuilder.toString().hashCode();
int hashCode = Math.abs((h ^ (h >>> 16))) % fileNum;
osArr[hashCode].write(sBuilder.append(' ').toString());
if(ii == -1) {
break;
}
}
bufferedReader.close();
for (OutputStreamWriter outputStreamWriter : osArr) {
outputStreamWriter.flush();
outputStreamWriter.close();
}
System.out.println("hash完毕");
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void hashAndSort(String path,int fileNum,int top) {
File file = new File(path);
try {
HashMap<String, Integer> resultMap = new HashMap<>();
HashMap<String, Integer> topMap = new HashMap<>();
for (int i = 0; i < fileNum; i++) {
File fileHash = new File(file.getParent() + "/hash/" + i + ".txt");
// File fileHash = new File(file.getParent() + "/新建文本文档.txt");
InputStreamReader inputStreamReader = new InputStreamReader(new FileInputStream(fileHash));
BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
HashMap<String, Integer> map = new HashMap<>(1024);
while(true) {
StringBuilder sBuilder = new StringBuilder();
int ii = -1;
while((ii = bufferedReader.read()) != -1 && ii != (int)' ') {
sBuilder.append((char)ii);
}
// System.out.println(sBuilder);
Integer integer = map.get(sBuilder.toString());
// System.out.println(integer);
if(integer == null) {
map.put(sBuilder.toString(), 1);
}else {
map.put(sBuilder.toString(), integer + 1);
}
if(ii == -1) {
break;
}
}
sortMap(map, top, topMap);
bufferedReader.close();
}
sortMap(topMap, top, resultMap);
File result = new File(file.getParent() + "/result" + ".txt");
OutputStreamWriter oStreamWriter = new OutputStreamWriter(new FileOutputStream(result));
for (Entry<String,Integer> entry : resultMap.entrySet()) {
oStreamWriter.write(entry.getKey() + " 出现次数:" + entry.getValue());
oStreamWriter.write("\r\n");
}
for (Entry<String,Integer> entry : resultMap.entrySet()) {
System.out.println(entry.toString());
}
oStreamWriter.flush();
oStreamWriter.close();
}catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void sortMap(Map<String, Integer> map,int top,Map<String, Integer> resultMap) {
List<Entry<String, Integer>> list = new ArrayList<>(map.entrySet());
Collections.sort(list, new Comparator<Entry<String, Integer>>() {
@Override
public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
if(o1.getValue() > o2.getValue()) {
return -1;
}else if (o1.getValue() < o2.getValue()) {
return 1;
}else {
return 0;
}
}
});
for (int j = 0; j < list.size() && j < top; j++) {
resultMap.put(list.get(j).getKey(), list.get(j).getValue());
}
}
public static void main(String[] args) {
String path = "F:/ip/source2.txt";
int top = 10;
int filenum = 1000;
generateIp((int)Math.pow(10, 9), path);
hash(path,filenum);
hashAndSort(path, filenum,top);
}
}