Java实现java文件英语词汇的统计
- 运用递归来实现多级目录的访问
- 正则表达式来实现英文的匹配
package cn.ex;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class JavaWordNum {
static int num = 0;
public static Map<String, Integer> map = new HashMap<>();
public static void main(String[] args) throws IOException {
File src = new File("D:\\Java\\eclipse-workspace");
count(src);
System.out.println(map.size());
StringBuffer sb = new StringBuffer();
for (String s : map.keySet()) {
if (map.get(s) < 50)
continue;
sb.append(s + " ");
++num;
System.out.printf("%s;%d\n", s, map.get(s));
}
FileWriter fw = new FileWriter("d:/java.txt");
fw.write(sb.toString());
fw.close();
System.out.println(num);
}
public static void count(File file) throws IOException {
Pattern p = Pattern.compile("[a-zA-Z]+");
if (file.isDirectory()) {
File[] f = file.listFiles();
for (File fi : f) {
if (fi.isDirectory()) {
count(fi);
} else if (fi.getName().endsWith(".java")) {
BufferedReader br = new BufferedReader(new FileReader(fi));
br.lines().forEach(e -> {
if (e.trim().length() > 0) {
Matcher m = p.matcher(e);
while (m.find()) {
String s = m.group();
if (map.containsKey(s)) {
map.put(s, map.get(s) + 1);
} else {
map.put(s, 1);
}
}
}
});
br.close();
}
}
} else if (file.getName().endsWith(".java")) {
BufferedReader br = new BufferedReader(new FileReader(file));
br.lines().forEach(e -> {
if (e.trim().length() > 0) {
Matcher m = p.matcher(e);
while (m.find()) {
String s = m.group();
if (map.containsKey(s)) {
map.put(s, map.get(s) + 1);
} else {
map.put(s, 1);
}
}
}
});
br.close();
}
}
}