Directory and File:
data3(dir)
file1.txt(file)
file2.txt(file)
file3.txt(file)
filter(dir)
filter.txt(file)
File contents:
file1.txt
Free Shipping Mini Car Auto12v Fresh Air Purifier Oxygen Bar
freeshipping freeshipping
file2.txt
freeshipping new Electromagnetic parking sensor no holes need to be drilled
freeshipping
freeshipping
file3.txt
DC 12V 1 to 3 Car Cigarette Lighter Socket Power Adapter Splitter with 1 USB Port free shipping #9622 [aaa bbb] ccc{ ddd}
freeshipping
Filter contents:
filter.txt
sensor
bbb
lighter
auto12v
usb
oxygen
ddd
parking
cigarette
port
1
free
shipping
no
need
Java Code:
WordsAnalysis.java
package com.algorithms.multiple.threads.frequency.word;
/**
* Created with IntelliJ IDEA.
* User: 1O1O
* Date: 2015-04-01
* Time: 19:31 PM
* :)~
* MULTIPLE-THREADS-WORD-FREQUENCY:WORD-FREQUENCY
*/
import java.io.*;
import java.text.SimpleDateFormat;
import java.util.*;
public class WordsAnalysis {
private static final String FILTER_WORDS_FILE_PATH = "//Users//robot//TEMP//testData//filter//filter.txt" ;
private static Set<String> filterWordsSet = new HashSet<String>();
/**
* @param args
* @throws IOException
*/
public static void main (String[] args) throws IOException {
loadFilterWords();
File f = new File("//Users//robot//TEMP//testData//data3" );
File[] fs = f.listFiles();
List<File> files1 = new ArrayList<File>();
for (int i = 0 ; i < fs.length/2 ; i++) {
files1.add(fs[i]);
}
List<File> files2 = new ArrayList<File>();
for (int i = fs.length/2 ; i < fs.length; i++) {
files2.add(fs[i]);
}
int threadCount = 0 ;
AllCountModel acm = new AllCountModel();
acm.setThreadCount(++threadCount);
CountWordsThread tt1 = new CountWordsThread(files1, acm);
System.out.println("Thread 1: start!" );
tt1.start();
acm.setThreadCount(++threadCount);
CountWordsThread tt2 = new CountWordsThread(files2, acm);
System.out.println("Thread 2: start!" );
tt2.start();
MonitorThread mt = new MonitorThread(acm);
System.out.println("Thread Monitor: start!" );
mt.start();
}
/**
*
* @param file
* @param wordsMap
* @return
* @throws IOException
*/
public Map<String, Integer> countWords (File file, Map<String, Integer> wordsMap) throws IOException{
String text = readFile(file).toLowerCase();
text = text.replaceAll("[`~!@#$%^&*()+=|{}':;',//\\[//\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。,、?]|\\s+|\t|\r" , " " );
String words[] = text.split("\\s+" );
for (int i = 0 ; i < words.length; i++) {
String word = words[i].trim();
if (wordsMap.containsKey(word) && !filterWordsSet.contains(word)){
wordsMap.put(word, (wordsMap.get(word) + 1 ));
}else if (!wordsMap.containsKey(word) && !filterWordsSet.contains(word)){
wordsMap.put(word, 1 );
}
}
return wordsMap;
}
public static List<Map.Entry<String, Integer>> hashSort (Map<String, Integer> dataHash) {
List<Map.Entry<String, Integer>> list_Data = new ArrayList<Map.Entry<String, Integer>>(dataHash.entrySet());
Collections.sort(list_Data, new Comparator<Map.Entry<String, Integer>>() {
public int compare (Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
if (o2.getValue() != null && o1.getValue() != null && o2.getValue().compareTo(o1.getValue()) > 0 ) {
return 1 ;
} else {
return -1 ;
}
}
});
return list_Data;
}
/**
* 打印结果
* @param AllCountModel 共享的结果集
*/
public static void show (AllCountModel acm){
System.out.println("Number of threads left: " +acm.getThreadCount());
Map<String, Integer> dataHash = acm.getDataHash();
List<Map.Entry<String, Integer>> dataList = hashSort(dataHash);
System.out.println("Start: write word and frequency" );
int size = dataList.size();
int number = 1 ;
SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss" );
System.out.println("========================" +df.format(new Date())+"=========================" );
System.out.println(String.format("%-15s" , "word number" ) + String.format("%-30s" , "word" ) + String.format("%-15s" ,"frequency" ));
for (int i = 0 ; i < size; i++) {
String word = dataList.get(i).getKey();
int frequency = dataList.get(i).getValue();
System.out.print(String.format("%-15d" , number++));
System.out.print(String.format("%-30s" , word));
System.out.print(String.format("%-15d" , frequency));
System.out.println();
}
System.out.println("End: write word and frequency" );
}
public static void loadFilterWords () {
String filterWordsText = readFileByPath(FILTER_WORDS_FILE_PATH);
String words[] = filterWordsText.split("\\s+|\\t|\\r|\\n" );
System.out.println("Number of filter words: " +words.length);
for (String word : words){
filterWordsSet.add(word);
}
}
/**
* read content from filePath and return content
* @param filePath
*/
public static String readFileByPath (String filePath) {
File file = new File(filePath);
StringBuffer result = new StringBuffer();
BufferedReader reader = null ;
try {
reader = new BufferedReader(new FileReader(file));
String tempString = null ;
while ((tempString = reader.readLine()) != null ) {
result.append(" " );
result.append(tempString);
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (reader != null ) {
try {
reader.close();
} catch (IOException e1) {
}
}
}
return result.toString();
}
public static String readFile (File file) {
StringBuffer result = new StringBuffer();
BufferedReader reader = null ;
try {
reader = new BufferedReader(new FileReader(file));
String tempString = null ;
while ((tempString = reader.readLine()) != null ) {
result.append(" " );
result.append(tempString);
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (reader != null ) {
try {
reader.close();
} catch (IOException e1) {
}
}
}
return result.toString();
}
}
AllCountModel.java
package com.algorithms.multiple.threads.frequency.word;
/**
* Created with IntelliJ IDEA.
* User: 1O1O
* Date: 2015-04-01
* Time: 19:31 PM
* :)~
* MULTIPLE-THREADS-WORD-FREQUENCY:WORD-FREQUENCY
*/
import java.util.HashMap;
import java.util.Map;
public class AllCountModel {
private int threadCount;
private static Map<String, Integer> dataHash = new HashMap<String, Integer>();
public int getThreadCount () {
return threadCount;
}
public void setThreadCount (int threadCount) {
this .threadCount = threadCount;
}
public Map<String, Integer> getDataHash (){
return dataHash;
}
public void setDataHash (Map<String, Integer> wordsMap){
for (String key : wordsMap.keySet()) {
if ((this .dataHash.get(key) != null )) {
int value = ((Integer) this .dataHash.get(key)).intValue()+((Integer) wordsMap.get(key)).intValue();
this .dataHash.put(key, new Integer(value));
} else if ((this .dataHash.get(key) == null ) ){
this .dataHash.put(key, ((Integer) wordsMap.get(key)).intValue());
}
}
}
}
CountWordsThread.java
package com.algorithms.multiple.threads.frequency.word;
/**
* Created with IntelliJ IDEA.
* User: 1O1O
* Date: 2015-04-01
* Time: 19:31 PM
* :)~
* MULTIPLE-THREADS-WORD-FREQUENCY:WORD-FREQUENCY
*/
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.HashMap;
import java.util.Map;
public class CountWordsThread extends Thread {
private List<File> files = new ArrayList<File>();
private Map<String, Integer> wordsMap = new HashMap<String, Integer>();
private AllCountModel allCountModel;
public CountWordsThread (List<File> files, AllCountModel allCountModel){
this .files = files;
this .allCountModel = allCountModel;
}
public void run () {
WordsAnalysis wa = new WordsAnalysis();
for (File file : files) {
try {
wordsMap = wa.countWords(file, wordsMap);
} catch (IOException e) {
e.printStackTrace();
}
}
synchronized (allCountModel) {
allCountModel.setThreadCount(allCountModel.getThreadCount() - 1 );
System.out.println("Thread: stop!" );
allCountModel.setDataHash(wordsMap);
}
}
}
MonitorThread.java
package com.algorithms.multiple.threads.frequency.word;
/**
* Created with IntelliJ IDEA.
* User: 1O1O
* Date: 2015-04-01
* Time: 19:31 PM
* :)~
* MULTIPLE-THREADS-WORD-FREQUENCY:WORD-FREQUENCY
*/
public class MonitorThread extends Thread {
private AllCountModel acm;
public MonitorThread (AllCountModel acm){
this .acm = acm;
}
public void run () {
while (true ){
try {
sleep(500 );
} catch (InterruptedException e) {
e.printStackTrace();
}
if (0 >= acm.getThreadCount()){
WordsAnalysis.show(acm);
System.out.println("Thread Monitor: end!" );
return ;
}
}
}
}
Outputs:
Number of filter words: 16
Thread 1: start!
Thread 2: start!
Thread Monitor: start!
Thread: stop!
Thread: stop!
Number of threads left: 0
Start: write word and frequency
========================2015-04-01 19:49:12=========================
word number word frequency
1 freeshipping 6
2 to 2
3 car 2
4 be 1
5 mini 1
6 holes 1
7 bar 1
8 9622 1
9 air 1
10 ccc 1
11 power 1
12 socket 1
13 drilled 1
14 fresh 1
15 electromagnetic 1
16 new 1
17 3 1
18 splitter 1
19 purifier 1
20 adapter 1
21 12v 1
22 aaa 1
23 with 1
24 dc 1
End: write word and frequency
Thread Monitor: end!