import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.*;
public class Test {
public static void main(String[] args) throws IOException {
//file stream
//文件切割
//希望将文件切割成几份
final int NUM = 5;
//该文件一共有多少行
List<List<String>> article = new ArrayList<>();
for (int i = 0; i < NUM; i++) {
article.add(new ArrayList<>());
}
BufferedReader reader =
new BufferedReader(new FileReader("data/Broken to Harness.txt"));
String line = null; //记录读取到的行
int count = 0;
while (null!=(line=reader.readLine())){
line = line.trim();
//去除空行
if (line.length()==0) {
continue;
}
//把行中无效字符去除
line = line.replaceAll(",|\\.|\\?|\"|!|;|:", "");
article.get((count++)%NUM).add(line);
}
reader.close();
/*for (List<String> list : article) {
System.out.println(list.size());
}*/
// 并行处理:多线程 word:String -> count:Integer
List<Future<HashMap<String, Integer>>> array = new ArrayList<>();
ExecutorService pool = Executors.newFixedThreadPool(5);
CountDownLatch latch = new CountDownLatch(5);
for (List<String> list : article) {
// 每个list就是一个部分
// 匿名内部类
/*pool.submit(new Callable<Map<String,Integer>>() {
@Override
public Map<String,Integer> call() throws Exception {
return null;
}
});*/
// lambda 箭头函数
Future<HashMap<String, Integer>> submit = pool.submit(() -> {
//在线程内部执行:并发
HashMap<String, Integer> map = new HashMap<>();
for (String row : list) {
//将文章中的一行以空格拆分成多个单词的数组
String[] words = row.split("\\s+");
//将单词一一取出
for (String word : words) {
//存放到 map 中
map.put(word, map.getOrDefault(word, 0) + 1);
String name = Thread.currentThread().getName();
System.out.println(name+"\t"+word);
}
}
try {
return map;
}finally {
latch.countDown(); //减少1
System.out.println(latch.getCount());
}
});
array.add(submit);
}
try {
//等待并行执行的线程结束, latch降为0之前一直阻塞
latch.await();
//及时清理资源
pool.shutdown(); //关闭线程池
for (List<String> list : article) {
list.clear(); //集合中嵌套的集合置空
}
article.clear(); //集合置空
article = null; //让集合的应用指向空
// 线程不安全的集合,并发处理会引发:java.util.ConcurrentModificationException
// Map<K,V> => HashMap 线程不安全
// Map<K,V> => Hashtable 线程安全(老版:单锁机制)
// ConcurrentMap<K,V> => ConcurrentHashMap 线程安全(新版:分段锁)
// 最终合并
Map<String,Integer> finalMap = new HashMap<>();
for (Future<HashMap<String, Integer>> mapFuture : array) {
//future.get()会阻塞等待线程执行完毕
try {
Map<String, Integer> map = mapFuture.get();
//主线程
for (Map.Entry<String, Integer> e : map.entrySet()) {
String word = e.getKey();
int _count = e.getValue();
finalMap.put(word, finalMap.getOrDefault(word,0) + _count);
}
//清理
map.clear();
} catch (InterruptedException e) {
e.printStackTrace();
} catch (ExecutionException e) {
e.printStackTrace();
}
}
//及时清理资源
array.clear();
//呈现最终结果
finalMap.forEach((k,v)-> System.out.println(k+"\t"+v));
finalMap.clear();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
java:Word Count
最新推荐文章于 2024-07-08 00:01:13 发布