上一篇用了MappedByteBuffer去读,倘若只用一般的NIO,如FileChannel、ByteBuffer呢?测试后发现其实效果差不多的。代码如下:
package com.io.nio;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class WordsCountNormalNIO {
public static Map<String, Integer> map = new HashMap<String, Integer>();
static final int BUFFER_SIZE = 1024 * 10;
static byte[] left = new byte[0];
static final int LF = 10;// 换行符 ASCII
static final int CR = 13;// 回车符 ASCII
@SuppressWarnings("resource")
public static void main(String args[]) throws Exception {
File fin = new File("E:\\Alllog.log");
long start = System.currentTimeMillis();
FileChannel fcin = new RandomAccessFile(fin, "r").getChannel();
ByteBuffer buf = ByteBuffer.allocate(BUFFER_SIZE);
readFileByLine(fcin, buf);
long end = System.currentTimeMillis();
System.out.println("spend time :" + (end - start));
//show();
showTop();
}
public static void readFileByLine(FileChannel fcin, ByteBuffer buf) {
byte[] lineByte = new byte[0];
String encode = "GBK";// "UTF-8"
try {
while (fcin.read(buf) != -1) {
int size = buf.position();// 读取结束后的位置,相当于读取的长度
byte[] byteArr = new byte[size];
buf.rewind();// rewind用于重读,其将position设回0,若不设置,无法使用下面的get方法
buf.get(byteArr);// 读入b
buf.clear();
int startNum = 0;
boolean hasLF = false;// 是否有换行符
for (int i = 0; i < size; i++) {
if (byteArr[i] == LF) {
hasLF = true;
int leftNum = left.length;
int lineNum = i - startNum;
lineByte = new byte[leftNum + lineNum];// 数组大小已经去掉换行符
System.arraycopy(left, 0, lineByte, 0, leftNum);// 填充了lineByte[0]~lineByte[leftNum-1]
System.arraycopy(byteArr, startNum, lineByte, leftNum,
lineNum);// 填充lineByte[leftNum]~lineByte[leftNum+lineNum-1]
left = new byte[0];
String line = new String(lineByte, encode);// 一行完整的字符串(过滤了换行和回车)
deal(line);
// System.out.println(line);
// 过滤回车符和换行符
if (i + 1 < size && byteArr[i + 1] == CR) {
startNum = i + 2;
} else {
startNum = i + 1;
}
}
}
if (hasLF) {
left = new byte[byteArr.length - startNum];
System.arraycopy(byteArr, startNum, left, 0, left.length);
} else {// 兼容单次读取的内容不足一行的情况
byte[] toleft = new byte[left.length + byteArr.length];
System.arraycopy(left, 0, toleft, 0, left.length);
System.arraycopy(byteArr, 0, toleft, left.length,
byteArr.length);
left = toleft;
}
}
if (left != null && left.length > 0) {// 兼容文件最后一行没有换行的情况
String line = new String(left, 0, left.length, encode);
deal(line);
// System.out.println(line);
}
} catch (IOException e) {
e.printStackTrace();
}
}
public static void deal(String line) {
Pattern pattern = Pattern.compile("[A-Z]?[a-z]+");
Matcher matcher = pattern.matcher(line);
while (matcher.find()) {
String word = matcher.group();
if (map.containsKey(word)) {
map.put(word, map.get(word) + 1);
} else {
map.put(word, 1);
}
}
}
public static void show() {
for (Map.Entry<String, Integer> entry : map.entrySet()) {
System.out.println(entry.getKey() + " : " + entry.getValue());
}
}
public static void showTop() {
Map.Entry<String, Integer>[] array=new Map.Entry[map.size()];
int i=0;
for (Map.Entry<String, Integer> entry : map.entrySet()) {
array[i]=entry;
i++;
}
int TopK=findTop(array,0,array.length-1,10);
for(Map.Entry<String, Integer> entry:map.entrySet()){
if(entry.getValue()>=TopK){
System.out.println(entry.getKey()+" : "+entry.getValue());
}
}
}
/*
* p[]为待查找数组,L,R分别为数组下标,k表示第k大数
*/
public static int findTop(Map.Entry<String, Integer>[] p, int L, int R, int k) {
if (L > R || k < 1)// 检查输入参数是否合法
return -1;
if (L == R)// 如果L等于R说明已找到,直接返回
return p[R].getValue();
int temp = quickSort(p, L, R);// 进行一次快排,返回下标
if (k + L == temp + 1)// 如果k+L等于返回的下标加1(L不一定从0开始)
return p[temp].getValue();// 则直接返回
if (k + L < temp + 1)// 如果k+L小于返回的下标加1
return findTop(p, L, temp - 1, k);// 在temp的左边查找第k大数
else
// 否则,在temp的右边部分查找第k-(temp-L+1)大数。这里,右边的第
// k-(temp-L+1)大数就是整个数组的第k大数
return findTop(p, temp + 1, R, k - (temp - L + 1));
}
/*
* 一次快速排序 *以p[L]为比较对象,比p[L]大或等于的在其左边,否则在其右边
*/
public static int quickSort(Map.Entry<String, Integer>[] p, int L, int R) {
if (L >= R) return -1;
int i = L;
int j = R;
Map.Entry<String, Integer> temp = p[L];
while (i < j) {
while (i < j && p[j].getValue() < temp.getValue()) j--;
if (i < j) {
p[i] = p[j];
i++;
}
while (i < j && p[i].getValue() > temp.getValue()) i++;
if (i < j) {
p[j] = p[i];
j--;
}
}
p[i] = temp;
// 加上以下两句注释,再将return注释掉,并且将返回值改为void,
// 就是一个完整的快速排序
// quickSort(p ,L,i-1);
// quickSort(p ,i+1,R);
return i;
}
}
结果与上篇一致,不过这里换了TOP K算法,用了快排。
jvisualvm运行如下: