Java实现WordCount统计本地文件(单线程和多线程)
单线程
public class WordCountTest {
public static void main(String[] args) {
long start = System.currentTimeMillis();
Map<String,Integer> wordMap=new TreeMap<>();
FileReader fr=null;
BufferedReader br=null;
try {
fr = new FileReader("word.txt");
br = new BufferedReader(fr);
String tmpStr="";
int num=0;
while ((tmpStr=br.readLine())!=null){
num++;
String[] split = tmpStr.trim().split("\\s+");
for (int i = 0; i < split.length; i++) {
if (wordMap.containsKey(split[i])){
Integer count = wordMap.get(split[i]);
wordMap.put(split[i],count+1);
}else {
wordMap.put(split[i],1);
}
}
}
System.out.println("word.txt 总行数:"+num);
Set<String> words = wordMap.keySet();
for (String word : words) {
Integer count = wordMap.get(word);
System.out.println("单词:"+word+" : "+count);
}
long end = System.currentTimeMillis();
System.out.println("程序总用时:"+(end-start)+"ms");
} catch (java.io.IOException e) {
e.printStackTrace();
}
}
}
多线程
public class WordCount implements Runnable{
String content="";
Map<String,Integer> wordMap=new TreeMap<>();
public WordCount(String content) {
this.content = content;
}
@Override
public void run() {
String[] split = content.trim().split("\\s+");
for (int i = 0; i < split.length; i++) {
if (wordMap.containsKey(split[i])) {
Integer count = wordMap.get(split[i]);
wordMap.put(split[i], count + 1);
} else {
wordMap.put(split[i], 1);
}
}
}
}
public class WordCountTest2 {
static ExecutorService executorService = Executors.newCachedThreadPool();
static TreeMap<String, Map<String, Integer>> threadMap = new TreeMap<>();
public static void threadSet(String content,Integer count) {
WordCount wordCount = new WordCount(content);
Thread thread = new Thread(wordCount);
executorService.execute(thread);
threadMap.put("thread- " + count, wordCount.wordMap);
}
public static void main(String[] args) {
long start = System.currentTimeMillis();
FileReader fr=null;
BufferedReader br=null;
try {
fr = new FileReader("word.txt");
br = new BufferedReader(fr);
String tmpStr="";
StringBuffer lineStringBuf=new StringBuffer();
int num=0;
while ((tmpStr=br.readLine())!=null){
num++;
lineStringBuf.append(tmpStr.trim()+" ");
if (num%300000==0){
threadSet(lineStringBuf.toString(),num/300000);
lineStringBuf.setLength(0);
}
}
if (lineStringBuf.length()>0){
threadSet(lineStringBuf.toString(),0);
lineStringBuf.setLength(0);
}
System.out.println("word.txt 总行数:"+num);
executorService.shutdown();
while (true){
if(executorService.isTerminated()){
HashMap<String, Integer> wordMap = new HashMap<>();
for (Map<String, Integer> map : threadMap.values()) {
Iterator<String> iterator = wordMap.keySet().iterator();
while (iterator.hasNext()){
String word = iterator.next();
if (wordMap.containsKey(word)){
wordMap.put(word,wordMap.get(word)+map.get(word));
}else {
wordMap.put(word,map.get(word));
}
}
}
for (String s : wordMap.keySet()) {
System.out.println("单词:"+s +":"+wordMap.get(s));
}
System.out.println("将信息写入指定文件中");
BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter("wordcount.txt"));
for (String word : wordMap.keySet()) {
String lingStr = word +":"+wordMap.get(word);
bufferedWriter.write(lingStr);
bufferedWriter.newLine();
bufferedWriter.flush();
break;
}
}
long end = System.currentTimeMillis();
System.out.println("程序总用时:"+(end-start)+"ms");
} catch (java.io.IOException e) {
e.printStackTrace();
}
}
}