package com.xasmall.Test;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Set;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multiset;
/*
* 处理文本数据,将英文单词分割,统计次数
* 使用正则表达式
* 首先除去所有非英文的字符,然后分割,最后使用guava multiset统计
*/
public class MapDemo {
public static void dealstring(String filename) throws IOException {
FileReader fileReader=new FileReader(new File(filename));
BufferedReader bufferedReader=new BufferedReader(fileReader);
StringBuffer stringBuffer=new StringBuffer();
String string=null;
while((string=bufferedReader.readLine())!=null) {
stringBuffer.append(string);
}
string=stringBuffer.toString();
string=string.replaceAll("[^a-zA-Z\\s+]", "");
String[] strings=string.split("[\\s+,\\.\n]");
Multiset<String> col=HashMultiset.create();
for(String string2:strings) {
col.add(string2);
}
Set<String> minset=col.elementSet();
for(String temp:minset) {
System.out.println(temp+"-->"+col.count(temp));
}
bufferedReader.close();
fileReader.close();
}
}
guava下载