java小案例(多线程实现WordCount)
先从网上找一篇英语文章:
字数太少,复制粘贴几次:
我们划分线程的策略是:每30万启动一个新的线程执行任务,首先定义一个线程任务类
import java.util.Map;
import java.util.TreeMap;
public class Count implements Runnable {
private String context;
private Map<String, Integer> map;
public Map<String, Integer> getMap() {
return map;
}
public Count(String context) {
this.context = context;
map = new TreeMap<>();
}
// 任务很简单,对传来的字符串切分为单词后,计数并保存在线程专属的Map中
@Override
public void run() {
if (!context.equals("")) {
String[] words = context.split("[,|\\.|\"|\\s+]");
for (String word : words) {
if (map.containsKey(word)) map.put(word, map.get(word) + 1);
else map.put(word, 1);
}
}
}
}
主线程:
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class WordCount {
// 表示已计算的行数
private static int number = 0;
// 监控已启动的线程
private static final ExecutorService es = Executors.newCachedThreadPool();
// 存储每个线程各自的结果位置
private static final Map<String, Map<String, Integer>> threadMap = new TreeMap<>();
// 文档内容临时存储
private static final StringBuffer sb = new StringBuffer();
public static void main(String[] args) {
long start = System.currentTimeMillis(); // 计算下总耗时
// 获取文档的输入流
FileReader fr = null;
BufferedReader br = null;
try {
fr = new FileReader("test_data/article.txt");
br = new BufferedReader(fr);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
// 下面开始读文档
if (br == null) throw new RuntimeException("初始化异常");
String tmp;
try {
while ((tmp = br.readLine()) != null) {
number++;
sb.append(tmp).append(" ");
// 到30W时开启一个线程
if (number % 300000 == 0) addThread(sb.toString(), "Thread - " + (number / 300000));
}
} catch (IOException e) {
e.printStackTrace();
}
// 如果剩余内容不足30W,也开启一个线程执行剩余的内容
if (sb.length() > 0) addThread(sb.toString(), "Thread - " + ((number / 300000)) + 1);
// 关闭线程提交请求
es.shutdown();
// 等待es中的全部线程执行结束
while (true) if (es.isTerminated()) break;
// 将treadMap中记录每个结果合并到最终要输出的wordMap中
Map<String, Integer> wordMap = new TreeMap<>();
for (Map<String, Integer> tMap : threadMap.values()) {
for (Map.Entry<String, Integer> entry : tMap.entrySet()) {
if (wordMap.containsKey(entry.getKey())) {
wordMap.put(entry.getKey(), wordMap.get(entry.getKey()) + entry.getValue());
} else {
wordMap.put(entry.getKey(), entry.getValue());
}
}
}
// 打印结果
for (String key : wordMap.keySet()) {
System.out.println(key + " : " + wordMap.get(key));
}
// 资源释放
close(fr, br);
long end = System.currentTimeMillis();
System.out.println("本次计算耗时:" + (end - start)+" ms");
}
// 触发线程启动任务
private static void addThread(String context, String threadName) {
Count cnt = new Count(context);
Thread thread = new Thread(cnt, threadName);
es.execute(thread);
threadMap.put(threadName, cnt.getMap());
sb.delete(0, sb.length());
}
// 定义关闭方法
private static void close(FileReader fr, BufferedReader br) {
if (fr != null) {
try {
fr.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (br != null) {
try {
br.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
输出结果:
life : 54400
As : 108800
But : 54400
......
your : 108800
本次计算耗时:7823 ms
我们换一个线程任务划分策略:计划使用10个线程完成任务,第一行分配给第一个线程,第二行分配给第二个线程,按此规律分配全部线程。
重写线程任务类:
import java.io.BufferedReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
public class Count2 implements Runnable {
private BufferedReader br;// 没有办法再发来完成的执行任务了,只能让每个线程单独地去读取文件
private int no = 0;// 线程编号
private Map<String, Integer> map = new HashMap<>(); // 线程保存结果的Map
public Count2(BufferedReader br, int no) {
this.br = br;
this.no = no;
}
public Map<String, Integer> getMap() {
return map;
}
// 只是比上次的代码多了读取文件的过程
@Override
public void run() {
String tmp;
int number = 0;
try {
while ((tmp = br.readLine()) != null) {
if (number % 10 == no) {
String[] words = tmp.split("[,|\\.|\"|\\s+]");
for (String word : words) {
if (map.containsKey(word)) map.put(word, map.get(word) + 1);
else map.put(word, 1);
}
}
number++;
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
主线程:
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class WordCount2 {
private static String path = "test_data/article.txt";
private static Map<String, Map<String, Integer>> threadMap = new HashMap<>();
private static Map<String, Integer> wordMap = new HashMap<>();
private static ExecutorService es = Executors.newCachedThreadPool();
public static void main(String[] args) {
long start = System.currentTimeMillis();
try {
for (int i = 0; i < 10; i++) {
FileReader fr = new FileReader(path);
BufferedReader br = new BufferedReader(fr);
Count2 c = new Count2(br, i);
threadMap.put("Thread-" + i, c.getMap());
es.execute(new Thread(c));
}
} catch (IOException e) {
e.printStackTrace();
}
es.shutdown();
while (true) if (es.isTerminated()) break;
for (Map<String, Integer> eachMap : threadMap.values()) {
for (String word : eachMap.keySet()) {
if (wordMap.containsKey(word)) {
wordMap.put(word, wordMap.get(word) + eachMap.get(word));
} else {
wordMap.put(word, eachMap.get(word));
}
}
}
for (Map.Entry<String, Integer> entry : wordMap.entrySet()) {
System.out.println(entry.getKey() + ":" + entry.getValue());
}
long end = System.currentTimeMillis();
System.out.println("总计耗时:" + (end - start) + "ms");
}
}
输出结果:
Each:108800
full-time:54400
......
challenge:54400
growth:54400
总计耗时:1900ms
可以再将结果写入新的文件中:
private static void writeMap2File(Map<String, Integer> map, String path) throws IOException {
FileWriter fw = new FileWriter(path);
BufferedWriter bw = new BufferedWriter(fw);
for (Map.Entry<String, Integer> entry : map.entrySet()) {
bw.write(entry.getKey() + ":" + entry.getValue());
bw.newLine();
}
bw.close();
}