找出多个文本中频率高的单词（2）

最新推荐文章于 2018-04-03 20:33:05 发布

AndersZhuo123

最新推荐文章于 2018-04-03 20:33:05 发布

阅读量1.1k

点赞数

分类专栏： java多线程

本文链接：https://blog.csdn.net/anders_zhuo/article/details/8515363

版权

java多线程专栏收录该内容

5 篇文章 0 订阅

订阅专栏

接上篇，我打算用用concurrent包里的CountDownLatch类去实现。

还是直接上代码吧：

Main.java

package com.anders.thread;

import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class Main {

	public static void main(String[] args) {

		int threadNumber = Integer.parseInt(PropertiesUtil.get("ThreadNumber"));

		ExecutorService es = Executors.newFixedThreadPool(threadNumber);
		SingleThreadStatistics[] threads = new SingleThreadStatistics[threadNumber];
		try {
			CountDownLatch doneSignals = new CountDownLatch(threadNumber);

			// 这是在 文件数比线程数多的情况下，若文件比线程数少的话，加个判断就可以了
			for (int i = 0; i < threadNumber; i++) {
				threads[i] = new SingleThreadStatistics(doneSignals);
				es.execute(threads[i]);
			}

			doneSignals.await();

			Map<String, Integer> map = mergeThreadMap(threads);

			display(map);

		} catch (InterruptedException e) {
			e.printStackTrace();
		} finally {
			es.shutdown();
		}

	}

	private static Map<String, Integer> mergeThreadMap(SingleThreadStatistics[] threads) {
		Map<String, Integer> map = new HashMap<String, Integer>();

		for (SingleThreadStatistics singleThreadStatistics : threads) {
			Map<String, Integer> threadMap = singleThreadStatistics.getMap();

			for (Map.Entry<String, Integer> entry : threadMap.entrySet()) {
				String threadWord = entry.getKey();
				Integer threadWordCount = entry.getValue();
				Integer wordCount = map.get(threadWord);

				if (wordCount == null) {
					map.put(threadWord, threadWordCount);
				} else {
					map.put(threadWord, threadWordCount + wordCount);
				}
			}
		}

		return map;
	}

	private static void display(Map<String, Integer> map) {

		for (Map.Entry<String, Integer> entry : map.entrySet()) {
			System.out.print(entry.getKey());
			System.out.println("   ," + entry.getValue());
		}

	}

}

SingleThreadStatistics.java

package com.anders.thread;

import java.io.File;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.CountDownLatch;

public class SingleThreadStatistics implements Runnable {

	private Map<String, Integer> map = new HashMap<String, Integer>();
	private CountDownLatch doneSignals;

	public SingleThreadStatistics(CountDownLatch doneSignals) {
		this.doneSignals = doneSignals;
	}

	@Override
	public void run() {

		while (true) {
			File file = FileManager.getFile();
			if (file == null) {
				break;
			}
			FileManager.parseFile(file, map);
		}

		doneSignals.countDown();

	}

	// --------getter/setter------------

	public Map<String, Integer> getMap() {
		return map;
	}

}

FileManager.java

package com.anders.thread;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Manage files and offer single for every thread
 * 
 * @author Anders
 * 
 */
public class FileManager {

	private static List<File> fileList;
	private static int index = 0;

	static {
		String dirPath = PropertiesUtil.get("DirName");
		String path = FileManager.class.getClassLoader().getResource(dirPath).getPath();
		fileList = getFiles(path);
	}

	public synchronized static File getFile() {
		if (index == fileList.size()) {
			return null;
		}
		File file = fileList.get(index);
		index++;
		return file;
	}

	private static List<File> getFiles(String dirPath) {

		File dir = new File(dirPath);
		if (!dir.exists() || !dir.isDirectory()) {
			return Collections.emptyList();
		}

		File[] files = dir.listFiles();

		//判断 是不是  以txt结尾的文件
		Pattern pattern = Pattern.compile(PropertiesUtil.get("FileType"));
		List<File> list = new ArrayList<File>();

		for (File file : files) {
			Matcher matcher = pattern.matcher(file.getName());
			if (matcher.matches()) {
				list.add(file);
			}
		}

		return list;
	}

	//读取文件  使用的是java.nio的filechannel 和bytebuffer
	public static void parseFile(File file, Map<String, Integer> map) {
		FileInputStream ins = null;
		try {
			ins = new FileInputStream(file);
			FileChannel fIns = ins.getChannel();
			ByteBuffer buffer = ByteBuffer.allocate(1024);

			while (true) {
				buffer.clear();
				int r = fIns.read(buffer);
				if (r == -1) {
					break;
				}
				buffer.flip();
				buffer2word(buffer, map);
			}
			fIns.close();

		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				if (ins != null) {
					ins.close();
				}
			} catch (IOException e) {
				e.printStackTrace();
			}
		}

	}

	//这个是  将读取的内容，提取出  英语字母
	private static void buffer2word(ByteBuffer buffer, Map<String, Integer> map) {
		StringBuilder str = new StringBuilder();
		for (int i = 0; i < buffer.limit(); i++) {
			byte b = buffer.get();
			if (isEnglishChar(b)) {
				str.append((char) b);
			} else {
				word2map(str.toString(), map);
				str = new StringBuilder();
			}
		}
	}

	//将  英语单词放到Map中
	private static void word2map(String word, Map<String, Integer> map) {
		Integer count = map.get(word);
		if (null == count) {
			map.put(word, 1);
		} else {
			map.put(word, ++count);
		}
	}

	//看看是否是  英语字符
	private static boolean isEnglishChar(byte b) {
		//通过ASCLL码  判断
		if (b > 65 && b < 91) {
			return true;
		}
		if (b > 97 && b < 123) {
			return true;
		}
		return false;
	}

}

config.properties

ThreadNumber=3
DirName=txt
FileType=.*.txt

以上是使用CountDownLatch 实现的当然也可以使用future+ExecutorService 实现，下一篇就是

其实我觉得最重要的代码是 FileManager里的

public synchronized static File getFile() {
		if (index == fileList.size()) {
			return null;
		}
		File file = fileList.get(index);
		index++;
		return file;
	}

这部分代码，因为只要每个thread 分别得到不同的文件，就可以了。

而且还有一个很重要的一点就是验证index是否已经读取完所有的文件要和index++放在一个同步块里面，不然会引起线程安全问题

AndersZhuo123

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
3
评论
找出多个文本中频率高的单词（2）

接上篇，我打算用用concurrent包里的CountDownLatch类去实现。还是直接上代码吧：Main.javapackage com.anders.thread;import java.util.HashMap;import java.util.Map;import java.util.concurrent.CountDownLatch;import ja
复制链接

扫一扫